source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 5955

Last change on this file since 5955 was 5955, checked in by xwa163, 12 months ago

Init commit for LZ4Grep

File size: 14.8 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_generate_deposit_stream.h>
20#include <kernels/kernel_builder.h>
21#include <kernels/deletion.h>
22#include <kernels/swizzle.h>
23#include <kernels/pdep_kernel.h>
24#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
25#include <kernels/lz4/lz4_match_copy_kernel.h>
26#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
27#include <kernels/lz4/lz4_block_decoder.h>
28#include <kernels/lz4/lz4_index_builder.h>
29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
36LZ4Generator::LZ4Generator():pxDriver("lz4d") {
37
38}
39
40MainFunctionType LZ4Generator::getMainFunc() {
41    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
42}
43
44
45
46void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
47    auto & iBuilder = pxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55
56    this->generateExtractAndDepositMarkers(iBuilder);
57
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
93
94    // GeneratePipeline
95    this->generateLoadByteStreamAndBitStream(iBuilder);
96    this->generateExtractAndDepositMarkers(iBuilder);
97
98    auto swizzle = this->generateSwizzleExtractData(iBuilder);
99
100    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
101    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
102
103    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
104    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
105
106    // Produce unswizzled bit streams
107    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
108    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
109    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
110
111//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
112
113    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
114    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
115
116    // --------------------------------------------------------
117    // End
118    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
119    outK->setInitialArguments({iBuilder->GetString(outputFile)});
120    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
121
122    pxDriver.generatePipelineIR();
123    pxDriver.deallocateBuffers();
124
125    iBuilder->CreateRetVoid();
126
127    pxDriver.finalizeObject();
128}
129
130void LZ4Generator::generatePipeline(const std::string& outputFile) {
131    auto & iBuilder = pxDriver.getBuilder();
132    this->generateMainFunc(iBuilder);
133
134    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
135
136    // GeneratePipeline
137    this->generateLoadByteStreamAndBitStream(iBuilder);
138    this->generateExtractAndDepositMarkers(iBuilder);
139
140    auto swizzle = this->generateSwizzleExtractData(iBuilder);
141
142    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
143    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
144
145    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
146    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
147
148
149    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
150    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
151
152    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
153    swizzledMatchCopyK->setInitialArguments({fileSize});
154    pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
155
156
157    // Produce unswizzled bit streams
158    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
159    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
160    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
161
162
163    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
164    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
165
166    // --------------------------------------------------------
167    // End
168    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
169    outK->setInitialArguments({iBuilder->GetString(outputFile)});
170    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
171
172    pxDriver.generatePipelineIR();
173    pxDriver.deallocateBuffers();
174
175    iBuilder->CreateRetVoid();
176
177    pxDriver.finalizeObject();
178}
179
180void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
181    Module * M = iBuilder->getModule();
182    Type * const sizeTy = iBuilder->getSizeTy();
183    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
184    Type * const voidTy = iBuilder->getVoidTy();
185    Type * const inputType = iBuilder->getInt8PtrTy();
186
187    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
188    main->setCallingConv(CallingConv::C);
189    Function::arg_iterator args = main->arg_begin();
190    inputStream = &*(args++);
191    inputStream->setName("input");
192
193    headerSize = &*(args++);
194    headerSize->setName("headerSize");
195
196    fileSize = &*(args++);
197    fileSize->setName("fileSize");
198
199    hasBlockChecksum = &*(args++);
200    hasBlockChecksum->setName("hasBlockChecksum");
201
202    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
203}
204
205void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
206    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
207    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
208
209    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
210    sourceK->setInitialArguments({inputStream, fileSize});
211    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
212    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
213    s2pk->addAttribute(MustConsumeAll());
214    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
215}
216
217void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
218    //// Decode Block Information
219    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks());
220    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
221    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
222
223    //// Generate Helper Markers Extenders, FX, XF
224    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
225    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
226    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
227
228
229    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
230    extenderK->addAttribute(MustConsumeAll());
231    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
232
233
234    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
235    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
236    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
237
238//    re::CC* xfCC = re::makeCC(0x0f);
239//    re::CC* fxCC = re::makeCC(0xf0);
240//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
241//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
242//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
243//    }
244
245//    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
246//    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
247
248//    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
249//    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
250
251    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
252
253    M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
254    M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
255
256    //TODO handle uncompressed part
257    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
258    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
259    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
260
261    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
262    StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
263    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
264    Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
265
266
267
268
269    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
270    Lz4IndexBuilderK->setInitialArguments({fileSize});
271    pxDriver.makeKernelCall(
272            Lz4IndexBuilderK,
273            {
274                    ByteStream,
275                    Extenders,
276//                    CC_0xFX,
277//                    CC_0xXF,
278
279                    // Block Data
280                    BlockData_IsCompressed,
281                    BlockData_BlockStart,
282                    BlockData_BlockEnd
283            }, {
284                    //Uncompressed Data
285                    UncompressedStartPos,
286                    UncompressedLength,
287                    UncompressedOutputPos,
288
289                    DeletionMarker,
290                    M0_Start,
291                    M0_End,
292                    Match_Offset,
293                    M0Marker
294            });
295
296    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
297    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
298
299}
300
301std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
302    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
303    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
304
305    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 8);
306    pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
307    return std::make_pair(u16Swizzle0, u16Swizzle1);
308}
309
310int LZ4Generator::get4MbBufferBlocks() {
311    return 4 * 1024 * 1024 / codegen::BlockSize;
312}
313
314int LZ4Generator::getInputBufferBlocks() {
315    return this->get4MbBufferBlocks() * 2;
316}
317int LZ4Generator::getDecompressedBufferBlocks() {
318    return this->get4MbBufferBlocks() * 2;
319}
320
321
322
323
324// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.