Ignore:
Timestamp:
May 10, 2018, 2:28:16 PM (16 months ago)
Author:
xwa163
Message:
  1. Implement SwizzledMultiplePDEPkernel with the same logic as new PDEPkernel, remove LZ4MultiplePDEPkernel, improve the performance
  2. Remove some unnecessary include
  3. Add prefix for some kernels
  4. Remove a legacy kernel
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6020 r6026  
    55#include <boost/iostreams/device/mapped_file.hpp>
    66
    7 #include <llvm/Support/CommandLine.h>
    8 #include <llvm/Support/PrettyStackTrace.h>
    97
    108#include <cc/cc_compiler.h>
    119
    12 #include <lz4FrameDecoder.h>
    13 #include <kernels/streamset.h>
    1410#include <kernels/cc_kernel.h>
    1511#include <kernels/s2p_kernel.h>
     
    2218#include <kernels/swizzle.h>
    2319#include <kernels/pdep_kernel.h>
    24 #include <kernels/lz4/lz4_multiple_pdep_kernel.h>
    25 #include <kernels/lz4/lz4_match_copy_kernel.h>
     20#include <kernels/swizzled_multiple_pdep_kernel.h>
    2621#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    2722#include <kernels/lz4/lz4_block_decoder.h>
     
    3429using namespace kernel;
    3530
    36 LZ4Generator::LZ4Generator():pxDriver("lz4d") {
     31LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
    3732
    3833}
    3934
    4035MainFunctionType LZ4Generator::getMainFunc() {
    41     return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
     36    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
    4237}
    4338
     
    4540
    4641void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
    47     auto & iBuilder = pxDriver.getBuilder();
     42    auto & iBuilder = mPxDriver.getBuilder();
    4843    this->generateMainFunc(iBuilder);
    4944
    50     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     45    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    5146
    5247    // GeneratePipeline
     
    6156
    6257    // Produce unswizzled bit streams
    63     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    64     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    65 
    66     pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
    67 
    68 
    69     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    70     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     58    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     59    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     60
     61    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
     62
     63
     64    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     65    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    7166
    7267    // --------------------------------------------------------
    7368    // End
    74     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     69    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    7570
    7671    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    77     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    78 
    79     pxDriver.generatePipelineIR();
    80     pxDriver.deallocateBuffers();
     72    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     73
     74    mPxDriver.generatePipelineIR();
     75    mPxDriver.deallocateBuffers();
    8176
    8277    iBuilder->CreateRetVoid();
    8378
    84     pxDriver.finalizeObject();
     79    mPxDriver.finalizeObject();
    8580}
    8681
    8782void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    88     auto & iBuilder = pxDriver.getBuilder();
     83    auto & iBuilder = mPxDriver.getBuilder();
    8984    this->generateMainFunc(iBuilder);
    9085
    91     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     86    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    9287
    9388    // GeneratePipeline
     
    9792    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    9893
    99     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    100     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    101 
    102     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    103     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     94    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     95    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     96
     97    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     98    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    10499
    105100    // Produce unswizzled bit streams
    106     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    107     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    108     pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
    109 
    110     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    111     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     101    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     102    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     103    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
     104
     105    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     106    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    112107
    113108    // --------------------------------------------------------
    114109    // End
    115     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     110    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    116111    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    117     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    118 
    119     pxDriver.generatePipelineIR();
    120     pxDriver.deallocateBuffers();
     112    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     113
     114    mPxDriver.generatePipelineIR();
     115    mPxDriver.deallocateBuffers();
    121116
    122117    iBuilder->CreateRetVoid();
    123118
    124     pxDriver.finalizeObject();
     119    mPxDriver.finalizeObject();
    125120}
    126121
    127122void LZ4Generator::generatePipeline(const std::string& outputFile) {
    128     auto & iBuilder = pxDriver.getBuilder();
     123    auto & iBuilder = mPxDriver.getBuilder();
    129124    this->generateMainFunc(iBuilder);
    130125
    131     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     126    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    132127
    133128    // GeneratePipeline
     
    137132    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    138133
    139     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    140     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    141 
    142     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    143     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    144 
    145 
    146     StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    147     StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    148 
    149     Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    150     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     134    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     135    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     136
     137    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     138    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     139
     140    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     141    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     142
     143    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     144    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    151145
    152146
    153147    // Produce unswizzled bit streams
    154     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    155     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    156     pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
    157 
    158 
    159     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    160     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     148    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     149    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     150    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     151
     152
     153    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     154    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    161155
    162156    // --------------------------------------------------------
    163157    // End
    164     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     158    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    165159    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    166     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    167 
    168     pxDriver.generatePipelineIR();
    169     pxDriver.deallocateBuffers();
     160    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     161
     162    mPxDriver.generatePipelineIR();
     163    mPxDriver.deallocateBuffers();
    170164
    171165    iBuilder->CreateRetVoid();
    172166
    173     pxDriver.finalizeObject();
     167    mPxDriver.finalizeObject();
    174168}
    175169
     
    184178    main->setCallingConv(CallingConv::C);
    185179    Function::arg_iterator args = main->arg_begin();
    186     inputStream = &*(args++);
    187     inputStream->setName("input");
    188 
    189     headerSize = &*(args++);
    190     headerSize->setName("headerSize");
    191 
    192     fileSize = &*(args++);
    193     fileSize->setName("fileSize");
    194 
    195     hasBlockChecksum = &*(args++);
    196     hasBlockChecksum->setName("hasBlockChecksum");
     180    mInputStream = &*(args++);
     181    mInputStream->setName("input");
     182
     183    mHeaderSize = &*(args++);
     184    mHeaderSize->setName("mHeaderSize");
     185
     186    mFileSize = &*(args++);
     187    mFileSize->setName("mFileSize");
     188
     189    mHasBlockChecksum = &*(args++);
     190    mHasBlockChecksum->setName("mHasBlockChecksum");
    197191    // TODO for now, we do not handle blockCheckSum
    198     hasBlockChecksum = iBuilder->getInt1(false);
     192    mHasBlockChecksum = iBuilder->getInt1(false);
    199193
    200194    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
     
    202196
    203197void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    204     ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    205     BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
    206 
    207     kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
    208     sourceK->setInitialArguments({inputStream, fileSize});
    209     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    210     Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
     198    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     199    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
     200
     201    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     202    sourceK->setInitialArguments({mInputStream, mFileSize});
     203    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
     204    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
    211205//    s2pk->addAttribute(MustConsumeAll());
    212     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     206    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
    213207}
    214208
    215209void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    216210    //// Decode Block Information
    217     StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    218     StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    219     StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     211    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     212    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     213    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    220214
    221215    //// Generate Helper Markers Extenders, FX, XF
    222     StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    223     MatchOffsetMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     216    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     217    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    224218        // FX and XF streams will be added to IndexBuilderKernel in the future
    225 //    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    226 //    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    227 
    228     Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     219//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     220//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     221
     222    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    229223//    extenderK->addAttribute(MustConsumeAll());
    230     pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
    231 
    232 
    233     Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
    234     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
    235     pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     224    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     225
     226
     227    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     228    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     229    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    236230
    237231//    re::CC* xfCC = re::makeCC(0x0f);
     
    242236//    }
    243237
    244 //    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
    245 //    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
    246 
    247 //    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
    248 //    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
     238//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
     239//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
     240
     241//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
     242//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
    249243
    250244    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    251245
    252246    //TODO handle uncompressed part
    253     StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    254     StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    255     StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    256 
    257     DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    258     M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    259     DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    260 
    261     Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    262     Lz4IndexBuilderK->setInitialArguments({fileSize});
    263     pxDriver.makeKernelCall(
     247    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     248    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     249    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     250
     251    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     252    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     253    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     254
     255    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     256    Lz4IndexBuilderK->setInitialArguments({mFileSize});
     257    mPxDriver.makeKernelCall(
    264258            Lz4IndexBuilderK,
    265259            {
    266                     ByteStream,
     260                    mCompressedByteStream,
    267261                    Extenders,
    268262//                    CC_0xFX,
     
    279273                    UncompressedOutputPos,
    280274
    281                     DeletionMarker,
    282                     M0Marker,
    283                     MatchOffsetMarker
     275                    mDeletionMarker,
     276                    mM0Marker,
     277                    mMatchOffsetMarker
    284278            });
    285279
    286     Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    287     pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
     280    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
     281    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    288282
    289283}
    290284
    291285std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    292     StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    293     StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    294 
    295     Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
    296     pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
     286    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     287    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     288
     289    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
     290    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
    297291    return std::make_pair(u16Swizzle0, u16Swizzle1);
    298292}
Note: See TracChangeset for help on using the changeset viewer.