source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 5864

Last change on this file since 5864 was 5864, checked in by xwa163, 15 months ago

Add LZ4D extract deposit related kernel, target and test cases

File size: 13.3 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_extract_e_m0.h>
20#include <kernels/lz4/lz4_generate_deposit_stream.h>
21#include <kernels/lz4/lz4_numbers_to_bitstream_kernel.h>
22//#include <kernels/LZ4MarkerToMaskKernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/kernel_builder.h>
25#include <kernels/lz4/lz4_block_decoder.h>
26#include <kernels/deletion.h>
27#include <kernels/swizzle.h>
28#include <kernels/pdep_kernel.h>
29#include <kernels/lz4/lz4_match_copy_kernel.h>
30
31namespace re { class CC; }
32
33using namespace llvm;
34using namespace parabix;
35using namespace kernel;
36
37LZ4Generator::LZ4Generator():pxDriver("lz4d") {
38
39}
40
41MainFunctionType LZ4Generator::getMainFunc() {
42    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
43}
44
45
46
47void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
48    auto & iBuilder = pxDriver.getBuilder();
49    this->generateMainFunc(iBuilder);
50
51    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
52
53    // GeneratePipeline
54    this->generateLoadByteStreamAndBitStream(iBuilder);
55
56
57    this->generateExtractAndDepositMarkers(iBuilder);
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generatePipeline(const std::string& outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92    StreamSetBuffer * const FinalDecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
93
94
95
96    // GeneratePipeline
97    this->generateLoadByteStreamAndBitStream(iBuilder);
98    this->generateExtractAndDepositMarkers(iBuilder);
99
100    auto swizzle = this->generateSwizzleExtractData(iBuilder);
101
102    StreamSetBuffer * depositedSwizzle0 = this->generateDepositData(iBuilder, swizzle.first);
103    StreamSetBuffer * depositedSwizzle1 = this->generateDepositData(iBuilder, swizzle.second);
104
105    // Produce unswizzled bit streams
106    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
107    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
108    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
109
110//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
111
112    // TODO MatchCopy before p2s
113
114    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
115    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
116
117//    Kernel * matchCopyK = pxDriver.addKernelInstance<LZ4MatchCopyKernel>(iBuilder);
118//    pxDriver.makeKernelCall(matchCopyK, {DecompressedByteStream, M0_Start, M0_End, Match_Offset}, {FinalDecompressedByteStream});
119
120    // --------------------------------------------------------
121    // End
122    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
123    outK->setInitialArguments({iBuilder->GetString(outputFile)});
124    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
125
126    pxDriver.generatePipelineIR();
127    pxDriver.deallocateBuffers();
128
129    iBuilder->CreateRetVoid();
130
131    pxDriver.finalizeObject();
132}
133
134void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
135    Module * M = iBuilder->getModule();
136    Type * const sizeTy = iBuilder->getSizeTy();
137    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
138    Type * const voidTy = iBuilder->getVoidTy();
139    Type * const inputType = iBuilder->getInt8PtrTy();
140
141    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
142    main->setCallingConv(CallingConv::C);
143    Function::arg_iterator args = main->arg_begin();
144    inputStream = &*(args++);
145    inputStream->setName("input");
146
147    headerSize = &*(args++);
148    headerSize->setName("headerSize");
149
150    fileSize = &*(args++);
151    fileSize->setName("fileSize");
152
153    hasBlockChecksum = &*(args++);
154    hasBlockChecksum->setName("hasBlockChecksum");
155
156    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
157}
158
159void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
160    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
161    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
162
163
164    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
165    sourceK->setInitialArguments({inputStream, fileSize});
166    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
167    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
168    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
169}
170
171void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
172    //// Decode Block Information
173    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
174    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
175    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
176
177    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
178    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize});
179    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
180
181
182    //// Generate Helper Markers Extenders, FX, XF
183    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
184    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
185    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
186
187
188    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
189    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
190
191    re::CC* xfCC = re::makeCC(0x0f);
192    re::CC* fxCC = re::makeCC(0xf0);
193    for (re::codepoint_t i = 1; i <= 0xf; i++) {
194        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
195        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
196    }
197
198    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
199    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
200
201    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
202    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
203
204
205    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
206
207    size_t m0BufferSize = this->getDecompressedBufferBlocks() * 2;
208    size_t e1BufferSize = this->getInputBufferBlocks();
209
210    M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
211    M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
212
213    //TODO handle uncompressed part
214    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
215    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
216    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
217
218    EMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), e1BufferSize);
219    StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
220    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), m0BufferSize);
221    Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
222
223
224
225    std::map<std::string, size_t> m_empty = {};
226
227    Kernel * extractEM0K = pxDriver.addKernelInstance<LZ4ExtractEM0Kernel>(iBuilder, m_empty);
228
229    pxDriver.makeKernelCall(
230            extractEM0K,
231            {
232                    ByteStream,
233                    Extenders,
234                    CC_0xFX,
235                    CC_0xXF,
236
237                    // Block Data
238                    BlockData_IsCompressed,
239                    BlockData_BlockStart,
240                    BlockData_BlockEnd
241            }, {
242                    //Uncompressed Data
243                    UncompressedStartPos,
244                    UncompressedLength,
245                    UncompressedOutputPos,
246
247                    EMarker,
248                    M0_Start,
249                    M0_End,
250                    Match_Offset
251            });
252
253
254
255    Kernel * buildM0StartMarkerK = pxDriver.addKernelInstance<LZ4NumbersToBitstreamKernel>("buildM0Marker", iBuilder);
256    pxDriver.makeKernelCall(buildM0StartMarkerK, {M0_Start, M0_End}, {M0Marker});
257
258
259    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
260    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker}); // TODO deposit
261
262}
263
264StreamSetBuffer* LZ4Generator::generateDepositData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, StreamSetBuffer* swizzleBuffer) {
265    //TODO buffer blocks here may be incorrect
266    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1); //TODO buffer blocks here may be incorrect
267    Kernel * pdep1K = pxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4, 4);
268    pxDriver.makeKernelCall(pdep1K, {DepositMarker, swizzleBuffer}, {depositedSwizzle0});
269    return depositedSwizzle0;
270}
271
272std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
273    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
274    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
275
276
277    StreamSetBuffer * const DeletionMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
278
279    Kernel * ExtractToDeletionMaskK = pxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
280    pxDriver.makeKernelCall(
281            ExtractToDeletionMaskK,
282            {
283                    EMarker
284            }, {
285                    DeletionMask
286            }
287    );
288
289    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 8);
290    pxDriver.makeKernelCall(delK, {DeletionMask, BasisBits}, {u16Swizzle0, u16Swizzle1});
291    return std::make_pair(u16Swizzle0, u16Swizzle1);
292}
293
294int LZ4Generator::getInputBufferBlocks() {
295    const int segmentSize = codegen::SegmentSize;
296    const int bufferSegments = codegen::BufferSegments * codegen::ThreadNum * 8 * 16 * 32 * 2;
297    return segmentSize * bufferSegments * 16;
298}
299
300int LZ4Generator::getDecompressedBufferBlocks() {
301    const unsigned decompressBufBlocks = 256U * 256U / codegen::BlockSize * 2 * 2;
302    return decompressBufBlocks;
303}
304
305
306
307// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.