source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 5926

Last change on this file since 5926 was 5926, checked in by xwa163, 15 months ago

Fix lz4 related GEP instructions and TODO

File size: 16.4 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_extract_e_m0.h>
20#include <kernels/lz4/lz4_generate_deposit_stream.h>
21#include <kernels/lz4/lz4_numbers_to_bitstream_kernel.h>
22#include <kernels/lz4/lz4_bitstream_not_kernel.h>
23#include <kernels/kernel_builder.h>
24#include <kernels/lz4/lz4_block_decoder.h>
25#include <kernels/deletion.h>
26#include <kernels/swizzle.h>
27#include <kernels/pdep_kernel.h>
28#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
29#include <kernels/lz4/lz4_match_copy_kernel.h>
30#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
31#include <kernels/lz4/lz4_block_decoder_new.h>
32#include <kernels/lz4/lz4_index_builder.h>
33
34namespace re { class CC; }
35
36using namespace llvm;
37using namespace parabix;
38using namespace kernel;
39
40LZ4Generator::LZ4Generator():pxDriver("lz4d") {
41
42}
43
44MainFunctionType LZ4Generator::getMainFunc() {
45    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
46}
47
48
49
50void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
51    auto & iBuilder = pxDriver.getBuilder();
52    this->generateMainFunc(iBuilder);
53
54    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
55
56    // GeneratePipeline
57    this->generateLoadByteStreamAndBitStream(iBuilder);
58
59    this->generateExtractAndDepositMarkers(iBuilder);
60
61    auto swizzle = this->generateSwizzleExtractData(iBuilder);
62
63
64    // Produce unswizzled bit streams
65    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
66    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
67
68    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
69
70
71    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
72    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
73
74    // --------------------------------------------------------
75    // End
76    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
77
78    outK->setInitialArguments({iBuilder->GetString(outputFile)});
79    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
80
81    pxDriver.generatePipelineIR();
82    pxDriver.deallocateBuffers();
83
84    iBuilder->CreateRetVoid();
85
86    pxDriver.finalizeObject();
87}
88
89void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
90    auto & iBuilder = pxDriver.getBuilder();
91    this->generateMainFunc(iBuilder);
92
93    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
94    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
95
96
97
98    // GeneratePipeline
99    this->generateLoadByteStreamAndBitStream(iBuilder);
100    this->generateExtractAndDepositMarkers(iBuilder);
101
102    auto swizzle = this->generateSwizzleExtractData(iBuilder);
103
104    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
105    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
106
107    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
108    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
109
110    // Produce unswizzled bit streams
111    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
112    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
113    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
114
115//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
116
117    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
118    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
119
120    // --------------------------------------------------------
121    // End
122    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
123    outK->setInitialArguments({iBuilder->GetString(outputFile)});
124    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
125
126    pxDriver.generatePipelineIR();
127    pxDriver.deallocateBuffers();
128
129    iBuilder->CreateRetVoid();
130
131    pxDriver.finalizeObject();
132}
133
134void LZ4Generator::generatePipeline(const std::string& outputFile) {
135    auto & iBuilder = pxDriver.getBuilder();
136    this->generateMainFunc(iBuilder);
137
138    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
139//    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
140
141
142    // GeneratePipeline
143    this->generateLoadByteStreamAndBitStream(iBuilder);
144    this->generateExtractAndDepositMarkers(iBuilder);
145
146    auto swizzle = this->generateSwizzleExtractData(iBuilder);
147
148    //TODO buffer blocks should be decompressedBufferBlocks
149    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
150    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
151
152    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
153    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
154
155
156    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
157    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
158
159    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
160    pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
161
162
163    // Produce unswizzled bit streams
164    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
165    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
166    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
167//    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
168
169//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
170
171
172    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
173    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
174
175//    Kernel * matchCopyK = pxDriver.addKernelInstance<LZ4MatchCopyKernel>(iBuilder);
176//    pxDriver.makeKernelCall(matchCopyK, {DecompressedByteStream, M0_Start, M0_End, Match_Offset}, {FinalDecompressedByteStream});
177
178    // --------------------------------------------------------
179    // End
180    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
181    outK->setInitialArguments({iBuilder->GetString(outputFile)});
182    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
183//    pxDriver.makeKernelCall(outK, {FinalDecompressedByteStream}, {});
184
185    pxDriver.generatePipelineIR();
186    pxDriver.deallocateBuffers();
187
188    iBuilder->CreateRetVoid();
189
190    pxDriver.finalizeObject();
191}
192
193void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
194    Module * M = iBuilder->getModule();
195    Type * const sizeTy = iBuilder->getSizeTy();
196    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
197    Type * const voidTy = iBuilder->getVoidTy();
198    Type * const inputType = iBuilder->getInt8PtrTy();
199
200    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
201    main->setCallingConv(CallingConv::C);
202    Function::arg_iterator args = main->arg_begin();
203    inputStream = &*(args++);
204    inputStream->setName("input");
205
206    headerSize = &*(args++);
207    headerSize->setName("headerSize");
208
209    fileSize = &*(args++);
210    fileSize->setName("fileSize");
211
212    hasBlockChecksum = &*(args++);
213    hasBlockChecksum->setName("hasBlockChecksum");
214
215    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
216}
217
218void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
219    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
220    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
221
222
223    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
224    sourceK->setInitialArguments({inputStream, fileSize});
225    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
226    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
227    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
228}
229
230void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
231    //// Decode Block Information
232    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
233    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
234    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
235
236    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
237    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize});
238    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
239
240
241    //// Generate Helper Markers Extenders, FX, XF
242    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
243    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
244    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
245
246
247    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
248    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
249
250    re::CC* xfCC = re::makeCC(0x0f);
251    re::CC* fxCC = re::makeCC(0xf0);
252    for (re::codepoint_t i = 1; i <= 0xf; i++) {
253        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
254        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
255    }
256
257    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
258    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
259
260    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
261    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
262
263
264    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
265
266    size_t m0BufferSize = this->getDecompressedBufferBlocks() * 2;
267    size_t e1BufferSize = this->getInputBufferBlocks();
268
269    M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
270    M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
271
272    //TODO handle uncompressed part
273    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
274    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
275    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
276
277    EMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), e1BufferSize);
278    StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
279    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
280    Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
281
282
283
284    std::map<std::string, size_t> m_empty = {};
285
286    Kernel * extractEM0K = pxDriver.addKernelInstance<LZ4ExtractEM0Kernel>(iBuilder, m_empty);
287
288    pxDriver.makeKernelCall(
289            extractEM0K,
290            {
291                    ByteStream,
292                    Extenders,
293                    CC_0xFX,
294                    CC_0xXF,
295
296                    // Block Data
297                    BlockData_IsCompressed,
298                    BlockData_BlockStart,
299                    BlockData_BlockEnd
300            }, {
301                    //Uncompressed Data
302                    UncompressedStartPos,
303                    UncompressedLength,
304                    UncompressedOutputPos,
305
306                    EMarker,
307                    M0_Start,
308                    M0_End,
309                    Match_Offset
310            });
311
312
313
314    Kernel * buildM0StartMarkerK = pxDriver.addKernelInstance<LZ4NumbersToBitstreamKernel>("buildM0Marker", iBuilder);
315    pxDriver.makeKernelCall(buildM0StartMarkerK, {M0_Start, M0_End}, {M0Marker});
316
317
318    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
319    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
320
321}
322
323std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
324    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
325    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
326
327
328    StreamSetBuffer * const DeletionMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
329
330    Kernel * ExtractToDeletionMaskK = pxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
331    pxDriver.makeKernelCall(
332            ExtractToDeletionMaskK,
333            {
334                    EMarker
335            }, {
336                    DeletionMask
337            }
338    );
339
340    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 8);
341    pxDriver.makeKernelCall(delK, {DeletionMask, BasisBits}, {u16Swizzle0, u16Swizzle1});
342    return std::make_pair(u16Swizzle0, u16Swizzle1);
343}
344
345int LZ4Generator::getInputBufferBlocks() {
346    const int segmentSize = codegen::SegmentSize;
347    const int bufferSegments = codegen::BufferSegments * codegen::ThreadNum * 8 * 16 * 32 * 2;
348    return segmentSize * bufferSegments * 16;
349}
350
351int LZ4Generator::getDecompressedBufferBlocks() {
352    const unsigned copyBackWindowBlocks = 256U * 256U / codegen::BlockSize;
353    // At least * 2 since we need to leave 1 window as source of match copy,
354    // while the other window as the destination for match copy
355    const unsigned decompressBufBlocks = copyBackWindowBlocks * 2;
356    return decompressBufBlocks;
357}
358
359
360
361
362// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.