source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6008

Last change on this file since 6008 was 6008, checked in by nmedfort, 12 months ago

Removed temporary buffers from pipeline and placed them in the source kernels.

File size: 14.4 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_generate_deposit_stream.h>
20#include <kernels/kernel_builder.h>
21#include <kernels/deletion.h>
22#include <kernels/swizzle.h>
23#include <kernels/pdep_kernel.h>
24#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
25#include <kernels/lz4/lz4_match_copy_kernel.h>
26#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
27#include <kernels/lz4/lz4_block_decoder.h>
28#include <kernels/lz4/lz4_index_builder.h>
29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
36LZ4Generator::LZ4Generator():pxDriver("lz4d") {
37
38}
39
40MainFunctionType LZ4Generator::getMainFunc() {
41    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
42}
43
44
45
46void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
47    auto & iBuilder = pxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55
56    this->generateExtractAndDepositMarkers(iBuilder);
57
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92
93    // GeneratePipeline
94    this->generateLoadByteStreamAndBitStream(iBuilder);
95    this->generateExtractAndDepositMarkers(iBuilder);
96
97    auto swizzle = this->generateSwizzleExtractData(iBuilder);
98
99    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
100    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
101
102    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
103    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
104
105    // Produce unswizzled bit streams
106    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
107    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
108    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
109
110    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
111    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
112
113    // --------------------------------------------------------
114    // End
115    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
116    outK->setInitialArguments({iBuilder->GetString(outputFile)});
117    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
118
119    pxDriver.generatePipelineIR();
120    pxDriver.deallocateBuffers();
121
122    iBuilder->CreateRetVoid();
123
124    pxDriver.finalizeObject();
125}
126
127void LZ4Generator::generatePipeline(const std::string& outputFile) {
128    auto & iBuilder = pxDriver.getBuilder();
129    this->generateMainFunc(iBuilder);
130
131    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
132
133    // GeneratePipeline
134    this->generateLoadByteStreamAndBitStream(iBuilder);
135    this->generateExtractAndDepositMarkers(iBuilder);
136
137    auto swizzle = this->generateSwizzleExtractData(iBuilder);
138
139    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
140    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
141
142    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
143    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
144
145
146    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
147    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
148
149    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
150    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
151
152
153    // Produce unswizzled bit streams
154    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
155    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
156    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
157
158
159    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
160    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
161
162    // --------------------------------------------------------
163    // End
164    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
165    outK->setInitialArguments({iBuilder->GetString(outputFile)});
166    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
167
168    pxDriver.generatePipelineIR();
169    pxDriver.deallocateBuffers();
170
171    iBuilder->CreateRetVoid();
172
173    pxDriver.finalizeObject();
174}
175
176void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
177    Module * M = iBuilder->getModule();
178    Type * const sizeTy = iBuilder->getSizeTy();
179    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
180    Type * const voidTy = iBuilder->getVoidTy();
181    Type * const inputType = iBuilder->getInt8PtrTy();
182
183    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
184    main->setCallingConv(CallingConv::C);
185    Function::arg_iterator args = main->arg_begin();
186    inputStream = &*(args++);
187    inputStream->setName("input");
188
189    headerSize = &*(args++);
190    headerSize->setName("headerSize");
191
192    fileSize = &*(args++);
193    fileSize->setName("fileSize");
194
195    hasBlockChecksum = &*(args++);
196    hasBlockChecksum->setName("hasBlockChecksum");
197
198    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
199}
200
201void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
202    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
203    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
204
205    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
206    sourceK->setInitialArguments({inputStream, fileSize});
207    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
208    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
209//    s2pk->addAttribute(MustConsumeAll());
210    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
211}
212
213void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
214    //// Decode Block Information
215    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
216    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
217    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
218
219    //// Generate Helper Markers Extenders, FX, XF
220    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
221    MatchOffsetMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
222        // FX and XF streams will be added to IndexBuilderKernel in the future
223//    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
224//    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
225
226    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
227//    extenderK->addAttribute(MustConsumeAll());
228    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
229
230
231    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
232    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
233    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
234
235//    re::CC* xfCC = re::makeCC(0x0f);
236//    re::CC* fxCC = re::makeCC(0xf0);
237//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
238//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
239//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
240//    }
241
242//    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
243//    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
244
245//    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
246//    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
247
248    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
249
250    //TODO handle uncompressed part
251    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
252    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
253    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
254
255    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
256    M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
257    M0CountMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
258    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
259
260    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
261    Lz4IndexBuilderK->setInitialArguments({fileSize});
262    pxDriver.makeKernelCall(
263            Lz4IndexBuilderK,
264            {
265                    ByteStream,
266                    Extenders,
267//                    CC_0xFX,
268//                    CC_0xXF,
269
270                    // Block Data
271                    BlockData_IsCompressed,
272                    BlockData_BlockStart,
273                    BlockData_BlockEnd
274            }, {
275                    //Uncompressed Data
276                    UncompressedStartPos,
277                    UncompressedLength,
278                    UncompressedOutputPos,
279
280                    DeletionMarker,
281                    M0Marker,
282                    M0CountMarker,
283                    MatchOffsetMarker
284            });
285
286    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
287    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
288
289}
290
291std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
292    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
293    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
294
295    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
296    pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
297    return std::make_pair(u16Swizzle0, u16Swizzle1);
298}
299
300int LZ4Generator::get4MbBufferBlocks() {
301    return 4 * 1024 * 1024 / codegen::BlockSize;
302}
303
304int LZ4Generator::getInputBufferBlocks() {
305    return this->get4MbBufferBlocks() * 2;
306}
307int LZ4Generator::getDecompressedBufferBlocks() {
308    return this->get4MbBufferBlocks() * 2;
309}
310
311
312
313
314// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.