source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6039

Last change on this file since 6039 was 6039, checked in by xwa163, 12 months ago

Init checkin for bitstream version of lz4d match copy

File size: 20.3 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/lz4/lz4_bitstream_not_kernel.h>
27
28namespace re { class CC; }
29
30using namespace llvm;
31using namespace parabix;
32using namespace kernel;
33
34LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
35
36}
37
38MainFunctionType LZ4Generator::getMainFunc() {
39    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
40}
41
42void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
43    auto & iBuilder = mPxDriver.getBuilder();
44    this->generateMainFunc(iBuilder);
45
46    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
47
48    // GeneratePipeline
49    this->generateLoadByteStreamAndBitStream(iBuilder);
50    this->generateExtractAndDepositMarkers(iBuilder);
51    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
52
53    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
54    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
55
56    // --------------------------------------------------------
57    // End
58    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
59
60    outK->setInitialArguments({iBuilder->GetString(outputFile)});
61    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
62
63    mPxDriver.generatePipelineIR();
64    mPxDriver.deallocateBuffers();
65
66    iBuilder->CreateRetVoid();
67
68    mPxDriver.finalizeObject();
69}
70
71void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
72    auto & iBuilder = mPxDriver.getBuilder();
73    this->generateMainFunc(iBuilder);
74
75    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
76
77    // GeneratePipeline
78    this->generateLoadByteStreamAndBitStream(iBuilder);
79    this->generateExtractAndDepositMarkers(iBuilder);
80    auto swizzle = this->generateSwizzleExtractData(iBuilder);
81
82
83    // Produce unswizzled bit streams
84    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
85    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
86
87    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
88
89
90    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
91    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
92
93    // --------------------------------------------------------
94    // End
95    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
96
97    outK->setInitialArguments({iBuilder->GetString(outputFile)});
98    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
99
100    mPxDriver.generatePipelineIR();
101    mPxDriver.deallocateBuffers();
102
103    iBuilder->CreateRetVoid();
104
105    mPxDriver.finalizeObject();
106}
107
108void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
109    auto & iBuilder = mPxDriver.getBuilder();
110    this->generateMainFunc(iBuilder);
111
112    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
113
114    // GeneratePipeline
115    this->generateLoadByteStreamAndBitStream(iBuilder);
116    this->generateExtractAndDepositMarkers(iBuilder);
117    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
118
119    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
120    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
121    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
122
123    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
124    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
125
126    // --------------------------------------------------------
127    // End
128    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
129
130    outK->setInitialArguments({iBuilder->GetString(outputFile)});
131    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
132
133    mPxDriver.generatePipelineIR();
134    mPxDriver.deallocateBuffers();
135
136    iBuilder->CreateRetVoid();
137
138    mPxDriver.finalizeObject();
139}
140
141void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
142    auto & iBuilder = mPxDriver.getBuilder();
143    this->generateMainFunc(iBuilder);
144
145    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
146
147    // GeneratePipeline
148    this->generateLoadByteStreamAndBitStream(iBuilder);
149    this->generateExtractAndDepositMarkers(iBuilder);
150
151    auto swizzle = this->generateSwizzleExtractData(iBuilder);
152
153    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
154    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
155
156    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
157    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
158
159    // Produce unswizzled bit streams
160    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
161    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
162    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
163
164    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
165    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
166
167    // --------------------------------------------------------
168    // End
169    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
170    outK->setInitialArguments({iBuilder->GetString(outputFile)});
171    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
172
173    mPxDriver.generatePipelineIR();
174    mPxDriver.deallocateBuffers();
175
176    iBuilder->CreateRetVoid();
177
178    mPxDriver.finalizeObject();
179}
180
181void LZ4Generator::generatePipeline(const std::string &outputFile) {
182    auto & iBuilder = mPxDriver.getBuilder();
183    this->generateMainFunc(iBuilder);
184
185    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
186
187    // GeneratePipeline
188    this->generateLoadByteStreamAndBitStream(iBuilder);
189    this->generateExtractAndDepositMarkers(iBuilder);
190    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
191
192    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
193    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
194    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
195
196    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
197    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
198    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
199
200    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
201    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
202
203    // --------------------------------------------------------
204    // End
205    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
206
207    outK->setInitialArguments({iBuilder->GetString(outputFile)});
208    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
209
210    mPxDriver.generatePipelineIR();
211    mPxDriver.deallocateBuffers();
212
213    iBuilder->CreateRetVoid();
214
215    mPxDriver.finalizeObject();
216}
217
218void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
219    auto & iBuilder = mPxDriver.getBuilder();
220    this->generateMainFunc(iBuilder);
221
222    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
223
224    // GeneratePipeline
225    this->generateLoadByteStreamAndBitStream(iBuilder);
226    this->generateExtractAndDepositMarkers(iBuilder);
227
228    auto swizzle = this->generateSwizzleExtractData(iBuilder);
229
230    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
231    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
232
233    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
234    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
235
236    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
237    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
238
239    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
240    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
241
242
243    // Produce unswizzled bit streams
244    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
245    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
246    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
247
248
249    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
250    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
251
252    // --------------------------------------------------------
253    // End
254    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
255    outK->setInitialArguments({iBuilder->GetString(outputFile)});
256    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
257
258    mPxDriver.generatePipelineIR();
259    mPxDriver.deallocateBuffers();
260
261    iBuilder->CreateRetVoid();
262
263    mPxDriver.finalizeObject();
264}
265
266void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
267    Module * M = iBuilder->getModule();
268    Type * const sizeTy = iBuilder->getSizeTy();
269    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
270    Type * const voidTy = iBuilder->getVoidTy();
271    Type * const inputType = iBuilder->getInt8PtrTy();
272
273    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
274    main->setCallingConv(CallingConv::C);
275    Function::arg_iterator args = main->arg_begin();
276    mInputStream = &*(args++);
277    mInputStream->setName("input");
278
279    mHeaderSize = &*(args++);
280    mHeaderSize->setName("mHeaderSize");
281
282    mFileSize = &*(args++);
283    mFileSize->setName("mFileSize");
284
285    mHasBlockChecksum = &*(args++);
286    mHasBlockChecksum->setName("mHasBlockChecksum");
287    // TODO for now, we do not handle blockCheckSum
288    mHasBlockChecksum = iBuilder->getInt1(false);
289
290    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
291}
292
293void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
294    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
295    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
296
297    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
298    sourceK->setInitialArguments({mInputStream, mFileSize});
299    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
300    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
301//    s2pk->addAttribute(MustConsumeAll());
302    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
303}
304
305void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
306    //// Decode Block Information
307    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
308    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
309    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
310
311    //// Generate Helper Markers Extenders, FX, XF
312    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
313    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
314        // FX and XF streams will be added to IndexBuilderKernel in the future
315//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
316//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
317
318    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
319//    extenderK->addAttribute(MustConsumeAll());
320    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
321
322
323    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
324    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
325    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
326
327//    re::CC* xfCC = re::makeCC(0x0f);
328//    re::CC* fxCC = re::makeCC(0xf0);
329//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
330//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
331//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
332//    }
333
334//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
335//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
336
337//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
338//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
339
340    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
341
342    //TODO handle uncompressed part
343    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
344    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
345    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
346
347    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
348    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
349    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
350
351    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
352    Lz4IndexBuilderK->setInitialArguments({mFileSize});
353    mPxDriver.makeKernelCall(
354            Lz4IndexBuilderK,
355            {
356                    mCompressedByteStream,
357                    Extenders,
358//                    CC_0xFX,
359//                    CC_0xXF,
360
361                    // Block Data
362                    BlockData_IsCompressed,
363                    BlockData_BlockStart,
364                    BlockData_BlockEnd
365            }, {
366                    //Uncompressed Data
367                    UncompressedStartPos,
368                    UncompressedLength,
369                    UncompressedOutputPos,
370
371                    mDeletionMarker,
372                    mM0Marker,
373                    mMatchOffsetMarker
374            });
375
376    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
377    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
378
379}
380
381std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
382    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
383    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
384
385    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
386    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
387    return std::make_pair(u16Swizzle0, u16Swizzle1);
388}
389
390parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
391    StreamSetBuffer * const compressionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
392    Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
393    mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {compressionMarker});
394
395    // Deletion
396    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
397    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
398
399    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
400    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, compressionMarker}, {deletedBits, deletionCounts});
401
402    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
403    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
404    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
405
406    return compressedBits;
407}
408
409int LZ4Generator::get4MbBufferBlocks() {
410    return 4 * 1024 * 1024 / codegen::BlockSize;
411}
412
413int LZ4Generator::getInputBufferBlocks() {
414    return this->get4MbBufferBlocks() * 2;
415}
416int LZ4Generator::getDecompressedBufferBlocks() {
417    return this->get4MbBufferBlocks() * 2;
418}
419
420
421
422
423// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.