source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6059

Last change on this file since 6059 was 6059, checked in by xwa163, 10 months ago
  1. Enable swizzled match copy in multiplexing lz4_grep for some special case
  2. Implement some lz4 AIO (all-in-one) pipeline and related kernel
File size: 30.0 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/lz4_index_builder_new.h>
26#include <kernels/lz4/lz4_bytestream_aio.h>
27#include <kernels/lz4/lz4_swizzled_aio.h>
28#include <kernels/bitstream_pdep_kernel.h>
29#include <kernels/lz4/lz4_bitstream_not_kernel.h>
30
31namespace re { class CC; }
32
33using namespace llvm;
34using namespace parabix;
35using namespace kernel;
36
37LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
38    mCompressionMarker = NULL;
39}
40
41MainFunctionType LZ4Generator::getMainFunc() {
42    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
43}
44
45void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
46    auto & iBuilder = mPxDriver.getBuilder();
47    this->generateMainFunc(iBuilder);
48
49    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
50
51    // GeneratePipeline
52    this->generateLoadByteStreamAndBitStream(iBuilder);
53
54    //// Decode Block Information
55    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
56    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
57    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
58
59    //// Generate Helper Markers Extenders, FX, XF
60    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
61    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
62    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
63    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
64
65
66    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
67    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
68    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
69
70    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
71    //TODO handle uncompressed part
72    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
73    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
74    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
75
76    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
77    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
78    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
79
80    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
81    Lz4IndexBuilderK->setInitialArguments({mFileSize});
82    mPxDriver.makeKernelCall(
83            Lz4IndexBuilderK,
84            {
85                    mCompressedByteStream,
86                    Extenders,
87
88                    // Block Data
89                    BlockData_IsCompressed,
90                    BlockData_BlockStart,
91                    BlockData_BlockEnd
92            }, {
93                    //Uncompressed Data
94                    UncompressedStartPos,
95                    UncompressedLength,
96                    UncompressedOutputPos,
97
98                    mCompressionMarker,
99                    mM0Marker,
100                    mMatchOffsetMarker
101            });
102
103    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
104    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
105
106
107    // Deletion
108    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
109    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
110
111    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
112    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
113
114    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
115    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
116    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
117
118
119    StreamSetBuffer * const extractedBits = compressedBits;
120
121    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
122    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
123
124    // --------------------------------------------------------
125    // End
126    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
127
128    outK->setInitialArguments({iBuilder->GetString(outputFile)});
129    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
130
131    mPxDriver.generatePipelineIR();
132    mPxDriver.deallocateBuffers();
133
134    iBuilder->CreateRetVoid();
135
136    mPxDriver.finalizeObject();
137}
138
139void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
140    auto & iBuilder = mPxDriver.getBuilder();
141    this->generateMainFunc(iBuilder);
142
143    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
144
145    // GeneratePipeline
146    this->generateLoadByteStreamAndBitStream(iBuilder);
147    this->generateExtractAndDepositMarkers(iBuilder);
148    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
149
150    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
151    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
152
153    // --------------------------------------------------------
154    // End
155    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
156
157    outK->setInitialArguments({iBuilder->GetString(outputFile)});
158    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
159
160    mPxDriver.generatePipelineIR();
161    mPxDriver.deallocateBuffers();
162
163    iBuilder->CreateRetVoid();
164
165    mPxDriver.finalizeObject();
166}
167
168void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
169    auto & iBuilder = mPxDriver.getBuilder();
170    this->generateMainFunc(iBuilder);
171
172    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
173
174    // GeneratePipeline
175    this->generateLoadByteStreamAndBitStream(iBuilder);
176    this->generateExtractAndDepositMarkers(iBuilder);
177    auto swizzle = this->generateSwizzleExtractData(iBuilder);
178
179
180    // Produce unswizzled bit streams
181    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
182    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
183
184    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
185
186
187    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
188    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
189
190    // --------------------------------------------------------
191    // End
192    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
193
194    outK->setInitialArguments({iBuilder->GetString(outputFile)});
195    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
196
197    mPxDriver.generatePipelineIR();
198    mPxDriver.deallocateBuffers();
199
200    iBuilder->CreateRetVoid();
201
202    mPxDriver.finalizeObject();
203}
204
205void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
206    auto & iBuilder = mPxDriver.getBuilder();
207    this->generateMainFunc(iBuilder);
208
209    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
210
211    // GeneratePipeline
212    this->generateLoadByteStreamAndBitStream(iBuilder);
213    this->generateExtractAndDepositMarkers(iBuilder);
214    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
215
216    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
217    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
218    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
219
220    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
221    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
222
223    // --------------------------------------------------------
224    // End
225    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
226
227    outK->setInitialArguments({iBuilder->GetString(outputFile)});
228    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
229
230    mPxDriver.generatePipelineIR();
231    mPxDriver.deallocateBuffers();
232
233    iBuilder->CreateRetVoid();
234
235    mPxDriver.finalizeObject();
236}
237
238void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
239    auto & iBuilder = mPxDriver.getBuilder();
240    this->generateMainFunc(iBuilder);
241
242    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
243
244    // GeneratePipeline
245    this->generateLoadByteStreamAndBitStream(iBuilder);
246    this->generateExtractAndDepositMarkers(iBuilder);
247
248    auto swizzle = this->generateSwizzleExtractData(iBuilder);
249
250    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
251    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
252
253    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
254    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
255
256    // Produce unswizzled bit streams
257    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
258    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
259    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
260
261    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
262    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
263
264    // --------------------------------------------------------
265    // End
266    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
267    outK->setInitialArguments({iBuilder->GetString(outputFile)});
268    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
269
270    mPxDriver.generatePipelineIR();
271    mPxDriver.deallocateBuffers();
272
273    iBuilder->CreateRetVoid();
274
275    mPxDriver.finalizeObject();
276}
277
278void LZ4Generator::generatePipeline(const std::string &outputFile) {
279    auto & iBuilder = mPxDriver.getBuilder();
280    this->generateMainFunc(iBuilder);
281
282    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
283
284    // GeneratePipeline
285    this->generateLoadByteStreamAndBitStream(iBuilder);
286    this->generateExtractAndDepositMarkers(iBuilder);
287    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
288
289    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
290    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
291    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
292
293    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
294    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
295    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
296
297    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
298    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
299
300    // --------------------------------------------------------
301    // End
302    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
303
304    outK->setInitialArguments({iBuilder->GetString(outputFile)});
305    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
306
307    mPxDriver.generatePipelineIR();
308    mPxDriver.deallocateBuffers();
309
310    iBuilder->CreateRetVoid();
311
312    mPxDriver.finalizeObject();
313}
314
315void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
316    auto & iBuilder = mPxDriver.getBuilder();
317    this->generateMainFunc(iBuilder);
318
319    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
320
321    // GeneratePipeline
322    this->generateLoadByteStreamAndBitStream(iBuilder);
323    this->generateExtractAndDepositMarkers(iBuilder);
324
325    auto swizzle = this->generateSwizzleExtractData(iBuilder);
326
327    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
328    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
329
330    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
331    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
332
333    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
334    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
335
336    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
337    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
338
339
340    // Produce unswizzled bit streams
341    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
342    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
343    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
344
345
346    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
347    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
348
349    // --------------------------------------------------------
350    // End
351    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
352    outK->setInitialArguments({iBuilder->GetString(outputFile)});
353    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
354
355    mPxDriver.generatePipelineIR();
356    mPxDriver.deallocateBuffers();
357
358    iBuilder->CreateRetVoid();
359
360    mPxDriver.finalizeObject();
361}
362
363void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
364    Module * M = iBuilder->getModule();
365    Type * const sizeTy = iBuilder->getSizeTy();
366    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
367    Type * const voidTy = iBuilder->getVoidTy();
368    Type * const inputType = iBuilder->getInt8PtrTy();
369
370    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
371    main->setCallingConv(CallingConv::C);
372    Function::arg_iterator args = main->arg_begin();
373    mInputStream = &*(args++);
374    mInputStream->setName("input");
375
376    mHeaderSize = &*(args++);
377    mHeaderSize->setName("mHeaderSize");
378
379    mFileSize = &*(args++);
380    mFileSize->setName("mFileSize");
381
382    mHasBlockChecksum = &*(args++);
383    mHasBlockChecksum->setName("mHasBlockChecksum");
384    // TODO for now, we do not handle blockCheckSum
385    mHasBlockChecksum = iBuilder->getInt1(false);
386
387    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
388}
389
390void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
391    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
392    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
393
394    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
395    sourceK->setInitialArguments({mInputStream, mFileSize});
396    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
397    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
398    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
399}
400
401StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
402    //// Decode Block Information
403    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
404    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
405    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
406
407    //// Generate Helper Markers Extenders, FX, XF
408    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
409    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
410    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
411    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
412
413
414    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
415    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
416    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
417
418
419    // Produce unswizzled bit streams
420    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
421    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
422    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
423    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
424
425
426
427    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
428    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
429
430
431    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
432    lz4AioK->setInitialArguments({mFileSize});
433    mPxDriver.makeKernelCall(
434            lz4AioK,
435            {
436                    mCompressedByteStream,
437                    Extenders,
438
439                    // Block Data
440                    BlockData_IsCompressed,
441                    BlockData_BlockStart,
442                    BlockData_BlockEnd,
443
444                    u16Swizzle0,
445                    u16Swizzle1
446            }, {
447                    decompressedSwizzled0,
448                    decompressedSwizzled1
449            });
450
451
452    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
453
454    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
455    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
456
457    return decompressionBitStream;
458}
459
460
461
462StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
463    //// Decode Block Information
464    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
465    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
466    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
467
468    //// Generate Helper Markers Extenders, FX, XF
469    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
470    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
471    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
472    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
473
474
475    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
476    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
477    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
478
479
480    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
481
482    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
483    lz4AioK->setInitialArguments({mFileSize});
484    mPxDriver.makeKernelCall(
485            lz4AioK,
486            {
487                    mCompressedByteStream,
488                    Extenders,
489
490                    // Block Data
491                    BlockData_IsCompressed,
492                    BlockData_BlockStart,
493                    BlockData_BlockEnd
494            }, {
495                    decompressionByteStream
496            });
497
498    return decompressionByteStream;
499}
500
501void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
502    //// Decode Block Information
503    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
504    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
505    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
506
507    //// Generate Helper Markers Extenders, FX, XF
508    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
509    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
510    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
511    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
512
513
514    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
515    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
516    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
517
518    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
519
520    //TODO handle uncompressed part
521    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
522    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
523    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
524
525    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
526    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
527    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
528
529    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
530    Lz4IndexBuilderK->setInitialArguments({mFileSize});
531    mPxDriver.makeKernelCall(
532            Lz4IndexBuilderK,
533            {
534                    mCompressedByteStream,
535                    Extenders,
536
537                    // Block Data
538                    BlockData_IsCompressed,
539                    BlockData_BlockStart,
540                    BlockData_BlockEnd
541            }, {
542                    //Uncompressed Data
543                    UncompressedStartPos,
544                    UncompressedLength,
545                    UncompressedOutputPos,
546
547                    mDeletionMarker,
548                    mM0Marker,
549                    mMatchOffsetMarker
550            });
551
552    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
553    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
554
555}
556
557std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
558    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
559    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
560
561    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
562    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
563    return std::make_pair(u16Swizzle0, u16Swizzle1);
564}
565
566void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
567    if (!mCompressionMarker) {
568        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
569        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
570        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
571    }
572}
573
574parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
575    this->generateCompressionMarker(iBuilder);
576
577    // Deletion
578    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
579    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
580
581    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
582    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
583
584    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
585    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
586    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
587
588    return compressedBits;
589}
590
591int LZ4Generator::get4MbBufferBlocks() {
592    return 4 * 1024 * 1024 / codegen::BlockSize;
593}
594
595int LZ4Generator::getInputBufferBlocks() {
596    return this->get4MbBufferBlocks() * 2;
597}
598int LZ4Generator::getDecompressedBufferBlocks() {
599    return this->get4MbBufferBlocks() * 2;
600}
601
602
603
604
605// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.