source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6029

Last change on this file since 6029 was 6029, checked in by xwa163, 15 months ago

Init checkin for bitstream_pdep_kernel

File size: 15.2 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_block_decoder.h>
23#include <kernels/lz4/lz4_index_builder.h>
24#include <kernels/bitstream_pdep_kernel.h>
25
26namespace re { class CC; }
27
28using namespace llvm;
29using namespace parabix;
30using namespace kernel;
31
32LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
33
34}
35
36MainFunctionType LZ4Generator::getMainFunc() {
37    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
38}
39
40
41
42void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
43    auto & iBuilder = mPxDriver.getBuilder();
44    this->generateMainFunc(iBuilder);
45
46    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
47
48    // GeneratePipeline
49    this->generateLoadByteStreamAndBitStream(iBuilder);
50
51
52    this->generateExtractAndDepositMarkers(iBuilder);
53
54
55    auto swizzle = this->generateSwizzleExtractData(iBuilder);
56
57
58    // Produce unswizzled bit streams
59    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
60    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
61
62    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
63
64
65    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
66    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
67
68    // --------------------------------------------------------
69    // End
70    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
71
72    outK->setInitialArguments({iBuilder->GetString(outputFile)});
73    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
74
75    mPxDriver.generatePipelineIR();
76    mPxDriver.deallocateBuffers();
77
78    iBuilder->CreateRetVoid();
79
80    mPxDriver.finalizeObject();
81}
82
83void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
84    auto & iBuilder = mPxDriver.getBuilder();
85    this->generateMainFunc(iBuilder);
86
87    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
88
89    // GeneratePipeline
90    this->generateLoadByteStreamAndBitStream(iBuilder);
91    this->generateExtractAndDepositMarkers(iBuilder);
92
93    auto swizzle = this->generateSwizzleExtractData(iBuilder); // TODO: use compression kernel instead
94
95    StreamSetBuffer * extractedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
96    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
97    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedBits});
98
99    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
100    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
101    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
102
103    /*
104
105    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
106    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
107
108    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
109    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
110
111    // Produce unswizzled bit streams
112    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
113    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
114    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
115     */
116
117    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
118    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
119
120    // --------------------------------------------------------
121    // End
122    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
123    outK->setInitialArguments({iBuilder->GetString(outputFile)});
124    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
125
126    mPxDriver.generatePipelineIR();
127    mPxDriver.deallocateBuffers();
128
129    iBuilder->CreateRetVoid();
130
131    mPxDriver.finalizeObject();
132}
133
134void LZ4Generator::generatePipeline(const std::string& outputFile) {
135    auto & iBuilder = mPxDriver.getBuilder();
136    this->generateMainFunc(iBuilder);
137
138    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
139
140    // GeneratePipeline
141    this->generateLoadByteStreamAndBitStream(iBuilder);
142    this->generateExtractAndDepositMarkers(iBuilder);
143
144    auto swizzle = this->generateSwizzleExtractData(iBuilder);
145
146    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
147    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
148
149    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
150    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
151
152    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
153    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
154
155    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
156    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
157
158
159    // Produce unswizzled bit streams
160    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
161    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
162    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
163
164
165    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
166    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
167
168    // --------------------------------------------------------
169    // End
170    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
171    outK->setInitialArguments({iBuilder->GetString(outputFile)});
172    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
173
174    mPxDriver.generatePipelineIR();
175    mPxDriver.deallocateBuffers();
176
177    iBuilder->CreateRetVoid();
178
179    mPxDriver.finalizeObject();
180}
181
182void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
183    Module * M = iBuilder->getModule();
184    Type * const sizeTy = iBuilder->getSizeTy();
185    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
186    Type * const voidTy = iBuilder->getVoidTy();
187    Type * const inputType = iBuilder->getInt8PtrTy();
188
189    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
190    main->setCallingConv(CallingConv::C);
191    Function::arg_iterator args = main->arg_begin();
192    mInputStream = &*(args++);
193    mInputStream->setName("input");
194
195    mHeaderSize = &*(args++);
196    mHeaderSize->setName("mHeaderSize");
197
198    mFileSize = &*(args++);
199    mFileSize->setName("mFileSize");
200
201    mHasBlockChecksum = &*(args++);
202    mHasBlockChecksum->setName("mHasBlockChecksum");
203    // TODO for now, we do not handle blockCheckSum
204    mHasBlockChecksum = iBuilder->getInt1(false);
205
206    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
207}
208
209void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
210    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
211    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
212
213    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
214    sourceK->setInitialArguments({mInputStream, mFileSize});
215    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
216    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
217//    s2pk->addAttribute(MustConsumeAll());
218    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
219}
220
221void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
222    //// Decode Block Information
223    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
224    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
225    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
226
227    //// Generate Helper Markers Extenders, FX, XF
228    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
229    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
230        // FX and XF streams will be added to IndexBuilderKernel in the future
231//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
232//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
233
234    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
235//    extenderK->addAttribute(MustConsumeAll());
236    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
237
238
239    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
240    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
241    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
242
243//    re::CC* xfCC = re::makeCC(0x0f);
244//    re::CC* fxCC = re::makeCC(0xf0);
245//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
246//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
247//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
248//    }
249
250//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
251//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
252
253//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
254//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
255
256    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
257
258    //TODO handle uncompressed part
259    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
260    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
261    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
262
263    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
264    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
265    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
266
267    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
268    Lz4IndexBuilderK->setInitialArguments({mFileSize});
269    mPxDriver.makeKernelCall(
270            Lz4IndexBuilderK,
271            {
272                    mCompressedByteStream,
273                    Extenders,
274//                    CC_0xFX,
275//                    CC_0xXF,
276
277                    // Block Data
278                    BlockData_IsCompressed,
279                    BlockData_BlockStart,
280                    BlockData_BlockEnd
281            }, {
282                    //Uncompressed Data
283                    UncompressedStartPos,
284                    UncompressedLength,
285                    UncompressedOutputPos,
286
287                    mDeletionMarker,
288                    mM0Marker,
289                    mMatchOffsetMarker
290            });
291
292    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
293    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
294
295}
296
297std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
298    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
299    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
300
301    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
302    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
303    return std::make_pair(u16Swizzle0, u16Swizzle1);
304}
305
306int LZ4Generator::get4MbBufferBlocks() {
307    return 4 * 1024 * 1024 / codegen::BlockSize;
308}
309
310int LZ4Generator::getInputBufferBlocks() {
311    return this->get4MbBufferBlocks() * 2;
312}
313int LZ4Generator::getDecompressedBufferBlocks() {
314    return this->get4MbBufferBlocks() * 2;
315}
316
317
318
319
320// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.