source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6044

Last change on this file since 6044 was 6044, checked in by xwa163, 11 months ago
  1. Fix a bug of multiplexing lz4_grep in release build
  2. Combine compressed-space LineBreakStream? and CharClassStream? before doing PDEP and match copy in order to improve the performance
File size: 20.5 KB
RevLine 
[5864]1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
[6026]20#include <kernels/swizzled_multiple_pdep_kernel.h>
[5906]21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
[6039]22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
[5948]23#include <kernels/lz4/lz4_block_decoder.h>
[5921]24#include <kernels/lz4/lz4_index_builder.h>
[6029]25#include <kernels/bitstream_pdep_kernel.h>
[6039]26#include <kernels/lz4/lz4_bitstream_not_kernel.h>
[5864]27
28namespace re { class CC; }
29
30using namespace llvm;
31using namespace parabix;
32using namespace kernel;
33
[6026]34LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
[6044]35    mCompressionMarker = NULL;
[5864]36}
37
38MainFunctionType LZ4Generator::getMainFunc() {
[6026]39    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
[5864]40}
41
[6039]42void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
[6026]43    auto & iBuilder = mPxDriver.getBuilder();
[5864]44    this->generateMainFunc(iBuilder);
45
[6026]46    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
[5864]47
48    // GeneratePipeline
49    this->generateLoadByteStreamAndBitStream(iBuilder);
[6039]50    this->generateExtractAndDepositMarkers(iBuilder);
51    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]52
[6039]53    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
54    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
[5948]55
[6039]56    // --------------------------------------------------------
57    // End
58    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]59
[6039]60    outK->setInitialArguments({iBuilder->GetString(outputFile)});
61    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5948]62
[6039]63    mPxDriver.generatePipelineIR();
64    mPxDriver.deallocateBuffers();
65
66    iBuilder->CreateRetVoid();
67
68    mPxDriver.finalizeObject();
69}
70
71void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
72    auto & iBuilder = mPxDriver.getBuilder();
73    this->generateMainFunc(iBuilder);
74
75    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
76
77    // GeneratePipeline
78    this->generateLoadByteStreamAndBitStream(iBuilder);
79    this->generateExtractAndDepositMarkers(iBuilder);
[5864]80    auto swizzle = this->generateSwizzleExtractData(iBuilder);
81
82
83    // Produce unswizzled bit streams
[6026]84    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
85    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[5864]86
[6026]87    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
[5864]88
89
[6026]90    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
91    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
[5864]92
93    // --------------------------------------------------------
94    // End
[6026]95    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]96
97    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]98    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]99
[6026]100    mPxDriver.generatePipelineIR();
101    mPxDriver.deallocateBuffers();
[5864]102
103    iBuilder->CreateRetVoid();
104
[6026]105    mPxDriver.finalizeObject();
[5864]106}
107
[5874]108void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
[6026]109    auto & iBuilder = mPxDriver.getBuilder();
[5874]110    this->generateMainFunc(iBuilder);
111
[6026]112    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
[5874]113
114    // GeneratePipeline
115    this->generateLoadByteStreamAndBitStream(iBuilder);
116    this->generateExtractAndDepositMarkers(iBuilder);
[6039]117    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5874]118
[6029]119    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
120    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
121    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
122
[6039]123    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
124    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[6029]125
[6039]126    // --------------------------------------------------------
127    // End
128    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
129
130    outK->setInitialArguments({iBuilder->GetString(outputFile)});
131    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
132
133    mPxDriver.generatePipelineIR();
134    mPxDriver.deallocateBuffers();
135
136    iBuilder->CreateRetVoid();
137
138    mPxDriver.finalizeObject();
139}
140
141void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
142    auto & iBuilder = mPxDriver.getBuilder();
143    this->generateMainFunc(iBuilder);
144
145    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
146
147    // GeneratePipeline
148    this->generateLoadByteStreamAndBitStream(iBuilder);
149    this->generateExtractAndDepositMarkers(iBuilder);
150
151    auto swizzle = this->generateSwizzleExtractData(iBuilder);
152
[6026]153    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
154    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
[5874]155
[6026]156    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
157    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]158
159    // Produce unswizzled bit streams
[6029]160    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
[6026]161    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6029]162    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
[5874]163
[6026]164    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6029]165    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[5874]166
167    // --------------------------------------------------------
168    // End
[6026]169    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5874]170    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]171    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5874]172
[6026]173    mPxDriver.generatePipelineIR();
174    mPxDriver.deallocateBuffers();
[5874]175
176    iBuilder->CreateRetVoid();
177
[6026]178    mPxDriver.finalizeObject();
[5874]179}
180
[6039]181void LZ4Generator::generatePipeline(const std::string &outputFile) {
[6026]182    auto & iBuilder = mPxDriver.getBuilder();
[5864]183    this->generateMainFunc(iBuilder);
184
[6026]185    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
[5864]186
187    // GeneratePipeline
188    this->generateLoadByteStreamAndBitStream(iBuilder);
189    this->generateExtractAndDepositMarkers(iBuilder);
[6039]190    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]191
[6039]192    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
193    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
194    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
195
196    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
197    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
198    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
199
200    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
201    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
202
203    // --------------------------------------------------------
204    // End
205    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
206
207    outK->setInitialArguments({iBuilder->GetString(outputFile)});
208    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
209
210    mPxDriver.generatePipelineIR();
211    mPxDriver.deallocateBuffers();
212
213    iBuilder->CreateRetVoid();
214
215    mPxDriver.finalizeObject();
216}
217
218void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
219    auto & iBuilder = mPxDriver.getBuilder();
220    this->generateMainFunc(iBuilder);
221
222    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
223
224    // GeneratePipeline
225    this->generateLoadByteStreamAndBitStream(iBuilder);
226    this->generateExtractAndDepositMarkers(iBuilder);
227
[5864]228    auto swizzle = this->generateSwizzleExtractData(iBuilder);
229
[6026]230    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
231    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
[5864]232
[6026]233    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
234    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]235
[6026]236    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
237    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
[5906]238
[6026]239    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
240    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
[5906]241
242
[5864]243    // Produce unswizzled bit streams
[6039]244    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
[6026]245    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6039]246    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
[5864]247
248
[6026]249    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6039]250    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
[5864]251
252    // --------------------------------------------------------
253    // End
[6026]254    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]255    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]256    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]257
[6026]258    mPxDriver.generatePipelineIR();
259    mPxDriver.deallocateBuffers();
[5864]260
261    iBuilder->CreateRetVoid();
262
[6026]263    mPxDriver.finalizeObject();
[5864]264}
265
266void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
267    Module * M = iBuilder->getModule();
268    Type * const sizeTy = iBuilder->getSizeTy();
269    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
270    Type * const voidTy = iBuilder->getVoidTy();
271    Type * const inputType = iBuilder->getInt8PtrTy();
272
273    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
274    main->setCallingConv(CallingConv::C);
275    Function::arg_iterator args = main->arg_begin();
[6026]276    mInputStream = &*(args++);
277    mInputStream->setName("input");
[5864]278
[6026]279    mHeaderSize = &*(args++);
280    mHeaderSize->setName("mHeaderSize");
[5864]281
[6026]282    mFileSize = &*(args++);
283    mFileSize->setName("mFileSize");
[5864]284
[6026]285    mHasBlockChecksum = &*(args++);
286    mHasBlockChecksum->setName("mHasBlockChecksum");
[6020]287    // TODO for now, we do not handle blockCheckSum
[6026]288    mHasBlockChecksum = iBuilder->getInt1(false);
[5864]289
290    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
291}
292
293void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6026]294    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
295    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
[5864]296
[6026]297    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
298    sourceK->setInitialArguments({mInputStream, mFileSize});
299    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
300    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
[5985]301//    s2pk->addAttribute(MustConsumeAll());
[6026]302    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
[5864]303}
304
305void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
306    //// Decode Block Information
[6026]307    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
308    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
309    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
[5864]310
311    //// Generate Helper Markers Extenders, FX, XF
[6026]312    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
313    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
[5974]314        // FX and XF streams will be added to IndexBuilderKernel in the future
[6026]315//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
316//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
[5864]317
[6026]318    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
[5985]319//    extenderK->addAttribute(MustConsumeAll());
[6026]320    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[5864]321
322
[6026]323    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
324    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
325    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
[5864]326
[5974]327//    re::CC* xfCC = re::makeCC(0x0f);
328//    re::CC* fxCC = re::makeCC(0xf0);
329//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
330//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
331//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
332//    }
333
[6026]334//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
335//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
[5974]336
[6026]337//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
338//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
[5974]339
[5864]340    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
341
342    //TODO handle uncompressed part
[6026]343    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
344    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
345    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
[5864]346
[6026]347    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
348    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
349    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
[5864]350
[6026]351    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
352    Lz4IndexBuilderK->setInitialArguments({mFileSize});
353    mPxDriver.makeKernelCall(
[5948]354            Lz4IndexBuilderK,
[5864]355            {
[6026]356                    mCompressedByteStream,
[5864]357                    Extenders,
[5974]358//                    CC_0xFX,
359//                    CC_0xXF,
[5864]360
361                    // Block Data
362                    BlockData_IsCompressed,
363                    BlockData_BlockStart,
364                    BlockData_BlockEnd
365            }, {
366                    //Uncompressed Data
367                    UncompressedStartPos,
368                    UncompressedLength,
369                    UncompressedOutputPos,
370
[6026]371                    mDeletionMarker,
372                    mM0Marker,
373                    mMatchOffsetMarker
[5864]374            });
375
[6026]376    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
377    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
[5864]378
379}
380
381std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6026]382    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
383    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
[5864]384
[6026]385    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
386    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
[5864]387    return std::make_pair(u16Swizzle0, u16Swizzle1);
388}
389
[6043]390void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
391    if (!mCompressionMarker) {
392        mCompressionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
393        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
394        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
395    }
396}
397
[6039]398parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6043]399    this->generateCompressionMarker(iBuilder);
[6039]400
401    // Deletion
402    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
[6043]403    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
[6039]404
405    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
[6043]406    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
[6039]407
408    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
409    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
410    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
411
412    return compressedBits;
413}
414
[5948]415int LZ4Generator::get4MbBufferBlocks() {
416    return 4 * 1024 * 1024 / codegen::BlockSize;
417}
418
[5864]419int LZ4Generator::getInputBufferBlocks() {
[5948]420    return this->get4MbBufferBlocks() * 2;
[5864]421}
422int LZ4Generator::getDecompressedBufferBlocks() {
[5948]423    return this->get4MbBufferBlocks() * 2;
[5864]424}
425
426
427
[5921]428
[5864]429// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.