source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 5957

Last change on this file since 5957 was 5957, checked in by nmedfort, 16 months ago

fixes for minor compilation issues

File size: 13.6 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_generate_deposit_stream.h>
20#include <kernels/kernel_builder.h>
21#include <kernels/deletion.h>
22#include <kernels/swizzle.h>
23#include <kernels/pdep_kernel.h>
24#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
25#include <kernels/lz4/lz4_match_copy_kernel.h>
26#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
27#include <kernels/lz4/lz4_block_decoder.h>
28#include <kernels/lz4/lz4_index_builder.h>
29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
36LZ4Generator::LZ4Generator():pxDriver("lz4d") {
37
38}
39
40MainFunctionType LZ4Generator::getMainFunc() {
41    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
42}
43
44
45
46void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
47    auto & iBuilder = pxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55
56    this->generateExtractAndDepositMarkers(iBuilder);
57
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92
93    // GeneratePipeline
94    this->generateLoadByteStreamAndBitStream(iBuilder);
95    this->generateExtractAndDepositMarkers(iBuilder);
96
97    auto swizzle = this->generateSwizzleExtractData(iBuilder);
98
99    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
100    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
101
102    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
103    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
104
105    // Produce unswizzled bit streams
106    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
107    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
108    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
109
110//    pxDriver.makeKernelCall(unSwizzleK, {u16Swizzle0, u16Swizzle1}, {extractedbits});
111
112    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
113    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
114
115    // --------------------------------------------------------
116    // End
117    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
118    outK->setInitialArguments({iBuilder->GetString(outputFile)});
119    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
120
121    pxDriver.generatePipelineIR();
122    pxDriver.deallocateBuffers();
123
124    iBuilder->CreateRetVoid();
125
126    pxDriver.finalizeObject();
127}
128
129void LZ4Generator::generatePipeline(const std::string& outputFile) {
130    auto & iBuilder = pxDriver.getBuilder();
131    this->generateMainFunc(iBuilder);
132
133    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
134
135    // GeneratePipeline
136    this->generateLoadByteStreamAndBitStream(iBuilder);
137    this->generateExtractAndDepositMarkers(iBuilder);
138
139    auto swizzle = this->generateSwizzleExtractData(iBuilder);
140
141    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
142    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
143
144    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
145    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
146
147
148    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
149    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
150
151    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
152    swizzledMatchCopyK->setInitialArguments({fileSize});
153    pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
154
155
156    // Produce unswizzled bit streams
157    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
158    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
159    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
160
161
162    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
163    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
164
165    // --------------------------------------------------------
166    // End
167    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
168    outK->setInitialArguments({iBuilder->GetString(outputFile)});
169    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
170
171    pxDriver.generatePipelineIR();
172    pxDriver.deallocateBuffers();
173
174    iBuilder->CreateRetVoid();
175
176    pxDriver.finalizeObject();
177}
178
179void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
180    Module * M = iBuilder->getModule();
181    Type * const sizeTy = iBuilder->getSizeTy();
182    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
183    Type * const voidTy = iBuilder->getVoidTy();
184    Type * const inputType = iBuilder->getInt8PtrTy();
185
186    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
187    main->setCallingConv(CallingConv::C);
188    Function::arg_iterator args = main->arg_begin();
189    inputStream = &*(args++);
190    inputStream->setName("input");
191
192    headerSize = &*(args++);
193    headerSize->setName("headerSize");
194
195    fileSize = &*(args++);
196    fileSize->setName("fileSize");
197
198    hasBlockChecksum = &*(args++);
199    hasBlockChecksum->setName("hasBlockChecksum");
200
201    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
202}
203
204void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
205    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
206    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
207
208    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
209    sourceK->setInitialArguments({inputStream, fileSize});
210    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
211    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
212    s2pk->addAttribute(MustConsumeAll());
213    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
214}
215
216void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
217    //// Decode Block Information
218    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks());
219    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
220    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
221
222    //// Generate Helper Markers Extenders, FX, XF
223    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
224
225    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
226    extenderK->addAttribute(MustConsumeAll());
227    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
228
229
230    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
231    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
232    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
233
234    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
235
236    M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
237    M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
238
239    //TODO handle uncompressed part
240    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
241    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
242    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
243
244    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
245    StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
246    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
247    Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
248
249    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
250    Lz4IndexBuilderK->setInitialArguments({fileSize});
251    pxDriver.makeKernelCall(
252            Lz4IndexBuilderK,
253            {
254                    ByteStream,
255                    Extenders,
256
257                    // Block Data
258                    BlockData_IsCompressed,
259                    BlockData_BlockStart,
260                    BlockData_BlockEnd
261            }, {
262                    //Uncompressed Data
263                    UncompressedStartPos,
264                    UncompressedLength,
265                    UncompressedOutputPos,
266
267                    DeletionMarker,
268                    M0_Start,
269                    M0_End,
270                    Match_Offset,
271                    M0Marker
272            });
273
274    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
275    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
276
277}
278
279std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
280    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
281    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<SwizzledCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
282
283    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 64, 8);
284    pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
285    return std::make_pair(u16Swizzle0, u16Swizzle1);
286}
287
288int LZ4Generator::get4MbBufferBlocks() {
289    return 4 * 1024 * 1024 / codegen::BlockSize;
290}
291
292int LZ4Generator::getInputBufferBlocks() {
293    return this->get4MbBufferBlocks() * 2;
294}
295int LZ4Generator::getDecompressedBufferBlocks() {
296    return this->get4MbBufferBlocks() * 2;
297}
298
299
300
301
302// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.