source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 5874

Last change on this file since 5874 was 5874, checked in by xwa163, 12 months ago
  1. Add Debug option “extract-and-deposit-only” for lz4_ext_dep
  2. Use MultiplePdepKernel? in LZ4Generator
File size: 15.4 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_extract_e_m0.h>
20#include <kernels/lz4/lz4_generate_deposit_stream.h>
21#include <kernels/lz4/lz4_numbers_to_bitstream_kernel.h>
22//#include <kernels/LZ4MarkerToMaskKernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/kernel_builder.h>
25#include <kernels/lz4/lz4_block_decoder.h>
26#include <kernels/deletion.h>
27#include <kernels/swizzle.h>
28#include <kernels/pdep_kernel.h>
29#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
30#include <kernels/lz4/lz4_match_copy_kernel.h>
31
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
38LZ4Generator::LZ4Generator():pxDriver("lz4d") {
39
40}
41
42MainFunctionType LZ4Generator::getMainFunc() {
43    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
44}
45
46
47
48void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
49    auto & iBuilder = pxDriver.getBuilder();
50    this->generateMainFunc(iBuilder);
51
52    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
53
54    // GeneratePipeline
55    this->generateLoadByteStreamAndBitStream(iBuilder);
56
57    this->generateExtractAndDepositMarkers(iBuilder);
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
93
94
95
96    // GeneratePipeline
97    this->generateLoadByteStreamAndBitStream(iBuilder);
98    this->generateExtractAndDepositMarkers(iBuilder);
99
100    auto swizzle = this->generateSwizzleExtractData(iBuilder);
101
102    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
103    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
104
105    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
106    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
107
108    // Produce unswizzled bit streams
109    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
110    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
111    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
112
113//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
114
115    // TODO MatchCopy before p2s
116
117    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
118    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
119
120    // --------------------------------------------------------
121    // End
122    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
123    outK->setInitialArguments({iBuilder->GetString(outputFile)});
124    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
125
126    pxDriver.generatePipelineIR();
127    pxDriver.deallocateBuffers();
128
129    iBuilder->CreateRetVoid();
130
131    pxDriver.finalizeObject();
132}
133
134void LZ4Generator::generatePipeline(const std::string& outputFile) {
135    auto & iBuilder = pxDriver.getBuilder();
136    this->generateMainFunc(iBuilder);
137
138    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
139    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
140
141
142
143    // GeneratePipeline
144    this->generateLoadByteStreamAndBitStream(iBuilder);
145    this->generateExtractAndDepositMarkers(iBuilder);
146
147    auto swizzle = this->generateSwizzleExtractData(iBuilder);
148
149    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
150    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
151
152    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
153    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
154
155    // Produce unswizzled bit streams
156    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
157    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
158    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
159
160//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
161
162    // TODO MatchCopy before p2s
163
164    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
165    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
166
167    Kernel * matchCopyK = pxDriver.addKernelInstance<LZ4MatchCopyKernel>(iBuilder);
168    pxDriver.makeKernelCall(matchCopyK, {DecompressedByteStream, M0_Start, M0_End, Match_Offset}, {FinalDecompressedByteStream});
169
170    // --------------------------------------------------------
171    // End
172    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
173    outK->setInitialArguments({iBuilder->GetString(outputFile)});
174    pxDriver.makeKernelCall(outK, {FinalDecompressedByteStream}, {});
175
176    pxDriver.generatePipelineIR();
177    pxDriver.deallocateBuffers();
178
179    iBuilder->CreateRetVoid();
180
181    pxDriver.finalizeObject();
182}
183
184void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
185    Module * M = iBuilder->getModule();
186    Type * const sizeTy = iBuilder->getSizeTy();
187    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
188    Type * const voidTy = iBuilder->getVoidTy();
189    Type * const inputType = iBuilder->getInt8PtrTy();
190
191    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
192    main->setCallingConv(CallingConv::C);
193    Function::arg_iterator args = main->arg_begin();
194    inputStream = &*(args++);
195    inputStream->setName("input");
196
197    headerSize = &*(args++);
198    headerSize->setName("headerSize");
199
200    fileSize = &*(args++);
201    fileSize->setName("fileSize");
202
203    hasBlockChecksum = &*(args++);
204    hasBlockChecksum->setName("hasBlockChecksum");
205
206    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
207}
208
209void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
210    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
211    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
212
213
214    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
215    sourceK->setInitialArguments({inputStream, fileSize});
216    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
217    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
218    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
219}
220
221void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
222    //// Decode Block Information
223    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
224    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
225    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
226
227    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
228    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize});
229    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
230
231
232    //// Generate Helper Markers Extenders, FX, XF
233    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
234    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
235    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
236
237
238    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
239    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
240
241    re::CC* xfCC = re::makeCC(0x0f);
242    re::CC* fxCC = re::makeCC(0xf0);
243    for (re::codepoint_t i = 1; i <= 0xf; i++) {
244        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
245        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
246    }
247
248    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
249    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
250
251    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
252    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
253
254
255    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
256
257    size_t m0BufferSize = this->getDecompressedBufferBlocks() * 2;
258    size_t e1BufferSize = this->getInputBufferBlocks();
259
260    M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
261    M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
262
263    //TODO handle uncompressed part
264    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
265    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
266    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
267
268    EMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), e1BufferSize);
269    StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
270    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
271    Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
272
273
274
275    std::map<std::string, size_t> m_empty = {};
276
277    Kernel * extractEM0K = pxDriver.addKernelInstance<LZ4ExtractEM0Kernel>(iBuilder, m_empty);
278
279    pxDriver.makeKernelCall(
280            extractEM0K,
281            {
282                    ByteStream,
283                    Extenders,
284                    CC_0xFX,
285                    CC_0xXF,
286
287                    // Block Data
288                    BlockData_IsCompressed,
289                    BlockData_BlockStart,
290                    BlockData_BlockEnd
291            }, {
292                    //Uncompressed Data
293                    UncompressedStartPos,
294                    UncompressedLength,
295                    UncompressedOutputPos,
296
297                    EMarker,
298                    M0_Start,
299                    M0_End,
300                    Match_Offset
301            });
302
303
304
305    Kernel * buildM0StartMarkerK = pxDriver.addKernelInstance<LZ4NumbersToBitstreamKernel>("buildM0Marker", iBuilder);
306    pxDriver.makeKernelCall(buildM0StartMarkerK, {M0_Start, M0_End}, {M0Marker});
307
308
309    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
310    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker}); // TODO deposit
311
312}
313
314std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
315    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
316    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
317
318
319    StreamSetBuffer * const DeletionMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
320
321    Kernel * ExtractToDeletionMaskK = pxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
322    pxDriver.makeKernelCall(
323            ExtractToDeletionMaskK,
324            {
325                    EMarker
326            }, {
327                    DeletionMask
328            }
329    );
330
331    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 8);
332    pxDriver.makeKernelCall(delK, {DeletionMask, BasisBits}, {u16Swizzle0, u16Swizzle1});
333    return std::make_pair(u16Swizzle0, u16Swizzle1);
334}
335
336int LZ4Generator::getInputBufferBlocks() {
337    const int segmentSize = codegen::SegmentSize;
338    const int bufferSegments = codegen::BufferSegments * codegen::ThreadNum * 8 * 16 * 32 * 2;
339    return segmentSize * bufferSegments * 16;
340}
341
342int LZ4Generator::getDecompressedBufferBlocks() {
343    const unsigned decompressBufBlocks = 256U * 256U / codegen::BlockSize * 2 * 2;
344    return decompressBufBlocks;
345}
346
347
348
349// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.