source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6111

Last change on this file since 6111 was 6111, checked in by xwa163, 9 months ago
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
File size: 33.6 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
28#include <kernels/bitstream_pdep_kernel.h>
29#include <kernels/lz4/lz4_bitstream_not_kernel.h>
30
31namespace re { class CC; }
32
33using namespace llvm;
34using namespace parabix;
35using namespace kernel;
36
37LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
38    mCompressionMarker = NULL;
39}
40
41MainFunctionType LZ4Generator::getMainFunc() {
42    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
43}
44
45void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
46    auto & iBuilder = mPxDriver.getBuilder();
47    this->generateMainFunc(iBuilder);
48
49    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
50
51    // GeneratePipeline
52    this->generateLoadByteStreamAndBitStream(iBuilder);
53
54    //// Decode Block Information
55    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
56    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
57    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
58
59    //// Generate Helper Markers Extenders, FX, XF
60    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
61    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
62    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
63    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
64
65
66    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
67    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
68    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
69
70    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
71    //TODO handle uncompressed part
72    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
73    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
74    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75
76    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
77    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
78    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
79
80    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
81    Lz4IndexBuilderK->setInitialArguments({mFileSize});
82    mPxDriver.makeKernelCall(
83            Lz4IndexBuilderK,
84            {
85                    mCompressedByteStream,
86                    Extenders,
87
88                    // Block Data
89                    BlockData_IsCompressed,
90                    BlockData_BlockStart,
91                    BlockData_BlockEnd
92            }, {
93                    //Uncompressed Data
94                    UncompressedStartPos,
95                    UncompressedLength,
96                    UncompressedOutputPos,
97
98                    mCompressionMarker,
99                    mM0Marker,
100                    mMatchOffsetMarker
101            });
102
103    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
104    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
105
106
107    // Deletion
108    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
109    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
110
111    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
112    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
113
114    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
115    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
116    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
117
118
119    StreamSetBuffer * const extractedBits = compressedBits;
120
121    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
122    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
123
124    // --------------------------------------------------------
125    // End
126    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
127
128    outK->setInitialArguments({iBuilder->GetString(outputFile)});
129    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
130
131    mPxDriver.generatePipelineIR();
132    mPxDriver.deallocateBuffers();
133
134    iBuilder->CreateRetVoid();
135
136    mPxDriver.finalizeObject();
137}
138
139void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
140    auto & iBuilder = mPxDriver.getBuilder();
141    this->generateMainFunc(iBuilder);
142
143    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
144
145    // GeneratePipeline
146    this->generateLoadByteStreamAndBitStream(iBuilder);
147    this->generateExtractAndDepositMarkers(iBuilder);
148    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
149
150    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
151    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
152
153    // --------------------------------------------------------
154    // End
155    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
156
157    outK->setInitialArguments({iBuilder->GetString(outputFile)});
158    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
159
160    mPxDriver.generatePipelineIR();
161    mPxDriver.deallocateBuffers();
162
163    iBuilder->CreateRetVoid();
164
165    mPxDriver.finalizeObject();
166}
167
168void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
169    auto & iBuilder = mPxDriver.getBuilder();
170    this->generateMainFunc(iBuilder);
171
172    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
173
174    // GeneratePipeline
175    this->generateLoadByteStreamAndBitStream(iBuilder);
176    this->generateExtractAndDepositMarkers(iBuilder);
177    auto swizzle = this->generateSwizzleExtractData(iBuilder);
178
179
180    // Produce unswizzled bit streams
181    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
182    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
183
184    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
185
186
187    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
188    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
189
190    // --------------------------------------------------------
191    // End
192    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
193
194    outK->setInitialArguments({iBuilder->GetString(outputFile)});
195    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
196
197    mPxDriver.generatePipelineIR();
198    mPxDriver.deallocateBuffers();
199
200    iBuilder->CreateRetVoid();
201
202    mPxDriver.finalizeObject();
203}
204
205void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
206    auto & iBuilder = mPxDriver.getBuilder();
207    this->generateMainFunc(iBuilder);
208
209    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
210
211    // GeneratePipeline
212    this->generateLoadByteStreamAndBitStream(iBuilder);
213    this->generateExtractAndDepositMarkers(iBuilder);
214    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
215
216    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
217    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
218    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
219
220    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
221    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
222
223    // --------------------------------------------------------
224    // End
225    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
226
227    outK->setInitialArguments({iBuilder->GetString(outputFile)});
228    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
229
230    mPxDriver.generatePipelineIR();
231    mPxDriver.deallocateBuffers();
232
233    iBuilder->CreateRetVoid();
234
235    mPxDriver.finalizeObject();
236}
237
238void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
239    auto & iBuilder = mPxDriver.getBuilder();
240    this->generateMainFunc(iBuilder);
241
242    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
243
244    // GeneratePipeline
245    this->generateLoadByteStreamAndBitStream(iBuilder);
246    this->generateExtractAndDepositMarkers(iBuilder);
247
248    auto swizzle = this->generateSwizzleExtractData(iBuilder);
249
250    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
251    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
252
253    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
254    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
255
256    // Produce unswizzled bit streams
257    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
258    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
259    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
260
261    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
262    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
263
264    // --------------------------------------------------------
265    // End
266    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
267    outK->setInitialArguments({iBuilder->GetString(outputFile)});
268    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
269
270    mPxDriver.generatePipelineIR();
271    mPxDriver.deallocateBuffers();
272
273    iBuilder->CreateRetVoid();
274
275    mPxDriver.finalizeObject();
276}
277
278void LZ4Generator::generatePipeline(const std::string &outputFile) {
279    auto & iBuilder = mPxDriver.getBuilder();
280    this->generateMainFunc(iBuilder);
281
282    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
283
284    // GeneratePipeline
285    this->generateLoadByteStreamAndBitStream(iBuilder);
286    this->generateExtractAndDepositMarkers(iBuilder);
287    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
288
289    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
290    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
291    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
292
293    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
294    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
295    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
296
297    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
298    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
299
300    // --------------------------------------------------------
301    // End
302    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
303
304    outK->setInitialArguments({iBuilder->GetString(outputFile)});
305    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
306
307    mPxDriver.generatePipelineIR();
308    mPxDriver.deallocateBuffers();
309
310    iBuilder->CreateRetVoid();
311
312    mPxDriver.finalizeObject();
313}
314
315void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
316    auto & iBuilder = mPxDriver.getBuilder();
317    this->generateMainFunc(iBuilder);
318
319    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
320
321    // GeneratePipeline
322    this->generateLoadByteStreamAndBitStream(iBuilder);
323    this->generateExtractAndDepositMarkers(iBuilder);
324
325    auto swizzle = this->generateSwizzleExtractData(iBuilder);
326
327    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
328    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
329
330    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
331    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
332
333    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
334    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
335
336    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
337    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
338
339
340    // Produce unswizzled bit streams
341    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
342    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
343    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
344
345
346    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
347    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
348
349    // --------------------------------------------------------
350    // End
351    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
352    outK->setInitialArguments({iBuilder->GetString(outputFile)});
353    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
354
355    mPxDriver.generatePipelineIR();
356    mPxDriver.deallocateBuffers();
357
358    iBuilder->CreateRetVoid();
359
360    mPxDriver.finalizeObject();
361}
362
363void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
364    Module * M = iBuilder->getModule();
365    Type * const sizeTy = iBuilder->getSizeTy();
366    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
367    Type * const voidTy = iBuilder->getVoidTy();
368    Type * const inputType = iBuilder->getInt8PtrTy();
369
370    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
371    main->setCallingConv(CallingConv::C);
372    Function::arg_iterator args = main->arg_begin();
373    mInputStream = &*(args++);
374    mInputStream->setName("input");
375
376    mHeaderSize = &*(args++);
377    mHeaderSize->setName("mHeaderSize");
378
379    mFileSize = &*(args++);
380    mFileSize->setName("mFileSize");
381
382    mHasBlockChecksum = &*(args++);
383    mHasBlockChecksum->setName("mHasBlockChecksum");
384    // TODO for now, we do not handle blockCheckSum
385    mHasBlockChecksum = iBuilder->getInt1(false);
386
387    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
388}
389
390void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
391    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
392    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
393    sourceK->setInitialArguments({mInputStream, mFileSize});
394    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
395}
396void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
397    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
398    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
399
400    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
401    sourceK->setInitialArguments({mInputStream, mFileSize});
402    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
403    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian);
404    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
405}
406
407StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
408    //// Decode Block Information
409    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
410    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
411    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
412
413    //// Generate Helper Markers Extenders, FX, XF
414//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
415//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
416//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
417//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
418
419
420    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
421    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
422    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
423
424
425    // Produce unswizzled bit streams
426    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
427    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
428    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
429    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
430
431
432
433    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
434    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
435
436
437    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
438    lz4AioK->setInitialArguments({mFileSize});
439    mPxDriver.makeKernelCall(
440            lz4AioK,
441            {
442                    mCompressedByteStream,
443
444//                    Extenders,
445
446                    // Block Data
447                    BlockData_IsCompressed,
448                    BlockData_BlockStart,
449                    BlockData_BlockEnd,
450
451                    u16Swizzle0,
452                    u16Swizzle1
453            }, {
454                    decompressedSwizzled0,
455                    decompressedSwizzled1
456            });
457
458
459    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
460
461    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
462    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
463
464    return decompressionBitStream;
465}
466
467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
468    //// Decode Block Information
469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
470    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
471    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
472
473    //// Generate Helper Markers Extenders
474//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
475//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
476//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
477//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
478
479    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
480    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
481    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
482
483
484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
485
486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
487    lz4AioK->setInitialArguments({mFileSize});
488    mPxDriver.makeKernelCall(
489            lz4AioK,
490            {
491                    mCompressedByteStream,
492
493//                    Extenders,
494
495                    // Block Data
496                    BlockData_IsCompressed,
497                    BlockData_BlockStart,
498                    BlockData_BlockEnd
499            }, {
500                    decompressionByteStream
501            });
502
503    return decompressionByteStream;
504
505}
506
507StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
508    //// Decode Block Information
509    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
510    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
511    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
512
513
514    //// Generate Helper Markers Extenders
515//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
516//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
517//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
518//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
519
520
521    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
522    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
523    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
524
525
526    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
527
528    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
529    lz4AioK->setInitialArguments({mFileSize});
530    mPxDriver.makeKernelCall(
531            lz4AioK,
532            {
533                    mCompressedByteStream,
534//                    Extenders,
535
536                    // Block Data
537                    BlockData_IsCompressed,
538                    BlockData_BlockStart,
539                    BlockData_BlockEnd
540            }, {
541                    decompressionByteStream
542            });
543
544    return decompressionByteStream;
545}
546
547void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
548    //// Decode Block Information
549    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
550    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
551    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
552
553    //// Generate Helper Markers Extenders, FX, XF
554    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
555    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
556    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
557    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
558
559
560    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
561    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
562    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
563
564    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
565
566    //TODO handle uncompressed part
567    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
568    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
569    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
570
571    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
572    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
573    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
574
575    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
576    Lz4IndexBuilderK->setInitialArguments({mFileSize});
577    mPxDriver.makeKernelCall(
578            Lz4IndexBuilderK,
579            {
580                    mCompressedByteStream,
581                    Extenders,
582
583                    // Block Data
584                    BlockData_IsCompressed,
585                    BlockData_BlockStart,
586                    BlockData_BlockEnd
587            }, {
588                    //Uncompressed Data
589                    UncompressedStartPos,
590                    UncompressedLength,
591                    UncompressedOutputPos,
592
593                    mDeletionMarker,
594                    mM0Marker,
595                    mMatchOffsetMarker
596            });
597
598    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
599    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
600
601}
602
603std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
604    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
605    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
606
607    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
608    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
609    return std::make_pair(u16Swizzle0, u16Swizzle1);
610}
611
612void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
613    if (!mCompressionMarker) {
614        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
615        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
616        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
617    }
618}
619
620parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
621    this->generateCompressionMarker(iBuilder);
622
623    // Deletion
624    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
625    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
626
627    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
628    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
629
630    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
631    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
632    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
633
634    return compressedBits;
635}
636
637int LZ4Generator::get4MbBufferBlocks() {
638    return mLz4BlockSize / codegen::BlockSize;
639}
640
641int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
642    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
643}
644int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
645    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
646}
647
648
649
650
651// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.