Ignore:
Timestamp:
Jul 31, 2018, 2:31:21 PM (7 months ago)
Author:
xwa163
Message:
  1. Cleanup legacy slow LZ4 related kernels
  2. Rename lz4d_ext_dep to lz4_decompression
  3. Rename LZ4 AIO related kernels to LZ4 Decompression Kernel
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6135 r6136  
    1313#include <kernels/source_kernel.h>
    1414#include <kernels/stdout_kernel.h>
    15 #include <kernels/lz4/lz4_generate_deposit_stream.h>
    1615#include <kernels/kernel_builder.h>
    1716#include <kernels/deletion.h>
     
    1918#include <kernels/pdep_kernel.h>
    2019#include <kernels/swizzled_multiple_pdep_kernel.h>
    21 #include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    22 #include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2320#include <kernels/lz4/lz4_block_decoder.h>
    24 #include <kernels/lz4/lz4_index_builder.h>
    25 #include <kernels/lz4/aio/lz4_bytestream_aio.h>
    26 #include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
    27 #include <kernels/lz4/aio/lz4_swizzled_aio.h>
    28 #include <kernels/lz4/aio/lz4_bitstream_aio.h>
    29 #include <kernels/lz4/aio/lz4_twist_aio.h>
     21#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
     22#include <kernels/lz4/decompression/lz4_parallel_bytestream_decompression.h>
     23#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
     24#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
     25#include <kernels/lz4/decompression/lz4_twist_decompression.h>
    3026#include <kernels/bitstream_pdep_kernel.h>
    31 #include <kernels/lz4/lz4_bitstream_not_kernel.h>
    32 #include <kernels/lz4/aio/twist_kernel.h>
    33 #include <kernels/lz4/aio/untwist_kernel.h>
     27#include <kernels/lz4/twist_kernel.h>
     28#include <kernels/lz4/untwist_kernel.h>
    3429
    3530namespace re { class CC; }
     
    4035
    4136LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
    42     mCompressionMarker = NULL;
    4337}
    4438
     
    4741}
    4842
    49 void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
    50     auto & iBuilder = mPxDriver.getBuilder();
    51     this->generateMainFunc(iBuilder);
    52 
    53     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    54 
    55     // GeneratePipeline
    56     this->generateLoadByteStreamAndBitStream(iBuilder);
    57 
    58     //// Decode Block Information
    59     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    60     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    61     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    62 
    63     //// Generate Helper Markers Extenders, FX, XF
    64     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    65     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    66     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    67     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    68 
    69 
    70     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    71     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    72     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    73 
    74     //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    75     //TODO handle uncompressed part
    76     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    77     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    78     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    79 
    80     mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    81     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    82     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    83 
    84     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    85     Lz4IndexBuilderK->setInitialArguments({mFileSize});
    86     mPxDriver.makeKernelCall(
    87             Lz4IndexBuilderK,
    88             {
    89                     mCompressedByteStream,
    90                     Extenders,
    91 
    92                     // Block Data
    93                     BlockData_IsCompressed,
    94                     BlockData_BlockStart,
    95                     BlockData_BlockEnd
    96             }, {
    97                     //Uncompressed Data
    98                     UncompressedStartPos,
    99                     UncompressedLength,
    100                     UncompressedOutputPos,
    101 
    102                     mCompressionMarker,
    103                     mM0Marker,
    104                     mMatchOffsetMarker
    105             });
    106 
    107     Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    108     mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    109 
    110 
    111     // Deletion
    112     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    113     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    114 
    115     Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    116     mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    117 
    118     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    119     Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    120     mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
    121 
    122 
    123     StreamSetBuffer * const extractedBits = compressedBits;
    124 
    125     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    126     mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
    127 
    128     // --------------------------------------------------------
    129     // End
    130     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    131 
    132     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    133     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     43void LZ4Generator::generateDecompressionPipeline(const std::string &outputFile) {
     44    auto & b = mPxDriver.getBuilder();
     45
     46    this->generateMainFunc(b);
     47    this->generateLoadByteStreamAndBitStream(b);
     48    parabix::StreamSetBuffer* uncompressedByteStream = this->generateAIODecompression(b);
     49
     50    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(b, 8);
     51    outK->setInitialArguments({b->GetString(outputFile)});
     52    mPxDriver.makeKernelCall(outK, {uncompressedByteStream}, {});
    13453
    13554    mPxDriver.generatePipelineIR();
    13655    mPxDriver.deallocateBuffers();
    13756
    138     iBuilder->CreateRetVoid();
     57    b->CreateRetVoid();
    13958
    14059    mPxDriver.finalizeObject();
    14160}
    14261
    143 void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
    144     auto & iBuilder = mPxDriver.getBuilder();
    145     this->generateMainFunc(iBuilder);
    146 
    147     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    148 
    149     // GeneratePipeline
    150     this->generateLoadByteStreamAndBitStream(iBuilder);
    151     this->generateExtractAndDepositMarkers(iBuilder);
    152     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    153 
    154     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    155     mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
    156 
    157     // --------------------------------------------------------
    158     // End
    159     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    160 
    161     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    162     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    163 
    164     mPxDriver.generatePipelineIR();
    165     mPxDriver.deallocateBuffers();
    166 
    167     iBuilder->CreateRetVoid();
    168 
    169     mPxDriver.finalizeObject();
    170 }
    171 
    172 void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
    173     auto & iBuilder = mPxDriver.getBuilder();
    174     this->generateMainFunc(iBuilder);
    175 
    176     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    177 
    178     // GeneratePipeline
    179     this->generateLoadByteStreamAndBitStream(iBuilder);
    180     this->generateExtractAndDepositMarkers(iBuilder);
    181     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    182 
    183 
    184     // Produce unswizzled bit streams
    185     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    186     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    187 
    188     mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
    189 
    190 
    191     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    192     mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    193 
    194     // --------------------------------------------------------
    195     // End
    196     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    197 
    198     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    199     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    200 
    201     mPxDriver.generatePipelineIR();
    202     mPxDriver.deallocateBuffers();
    203 
    204     iBuilder->CreateRetVoid();
    205 
    206     mPxDriver.finalizeObject();
    207 }
    208 
    209 void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    210     auto & iBuilder = mPxDriver.getBuilder();
    211     this->generateMainFunc(iBuilder);
    212 
    213     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    214 
    215     // GeneratePipeline
    216     this->generateLoadByteStreamAndBitStream(iBuilder);
    217     this->generateExtractAndDepositMarkers(iBuilder);
    218     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    219 
    220     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    221     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    222     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    223 
    224     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    225     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    226 
    227     // --------------------------------------------------------
    228     // End
    229     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    230 
    231     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    232     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    233 
    234     mPxDriver.generatePipelineIR();
    235     mPxDriver.deallocateBuffers();
    236 
    237     iBuilder->CreateRetVoid();
    238 
    239     mPxDriver.finalizeObject();
    240 }
    241 
    242 void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    243     auto & iBuilder = mPxDriver.getBuilder();
    244     this->generateMainFunc(iBuilder);
    245 
    246     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    247 
    248     // GeneratePipeline
    249     this->generateLoadByteStreamAndBitStream(iBuilder);
    250     this->generateExtractAndDepositMarkers(iBuilder);
    251 
    252     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    253 
    254     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    255     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    256 
    257     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    258     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    259 
    260     // Produce unswizzled bit streams
    261     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    262     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    263     mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
    264 
    265     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    266     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    267 
    268     // --------------------------------------------------------
    269     // End
    270     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    271     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    272     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    273 
    274     mPxDriver.generatePipelineIR();
    275     mPxDriver.deallocateBuffers();
    276 
    277     iBuilder->CreateRetVoid();
    278 
    279     mPxDriver.finalizeObject();
    280 }
    281 
    282 void LZ4Generator::generatePipeline(const std::string &outputFile) {
    283     auto & iBuilder = mPxDriver.getBuilder();
    284     this->generateMainFunc(iBuilder);
    285 
    286     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    287 
    288     // GeneratePipeline
    289     this->generateLoadByteStreamAndBitStream(iBuilder);
    290     this->generateExtractAndDepositMarkers(iBuilder);
    291     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    292 
    293     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    294     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    295     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    296 
    297     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    298     Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    299     mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    300 
    301     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    302     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    303 
    304     // --------------------------------------------------------
    305     // End
    306     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    307 
    308     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    309     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    310 
    311     mPxDriver.generatePipelineIR();
    312     mPxDriver.deallocateBuffers();
    313 
    314     iBuilder->CreateRetVoid();
    315 
    316     mPxDriver.finalizeObject();
    317 }
    318 
    319 void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
    320     auto & iBuilder = mPxDriver.getBuilder();
    321     this->generateMainFunc(iBuilder);
    322 
    323     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    324 
    325     // GeneratePipeline
    326     this->generateLoadByteStreamAndBitStream(iBuilder);
    327     this->generateExtractAndDepositMarkers(iBuilder);
    328 
    329     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    330 
    331     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    332     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    333 
    334     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    335     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    336 
    337     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    338     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    339 
    340     Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    341     mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    342 
    343 
    344     // Produce unswizzled bit streams
    345     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    346     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    347     mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
    348 
    349 
    350     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    351     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    352 
    353     // --------------------------------------------------------
    354     // End
    355     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    356     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    357     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    358 
    359     mPxDriver.generatePipelineIR();
    360     mPxDriver.deallocateBuffers();
    361 
    362     iBuilder->CreateRetVoid();
    363 
    364     mPxDriver.finalizeObject();
    365 }
    36662
    36763void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     
    418114    auto & iBuilder = mGrepDriver->getBuilder();
    419115
    420     //// Decode Block Information
    421     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    422     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    423     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    424 
    425 
    426     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    427     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    428     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    429 
    430 
     116    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    431117
    432118    size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams();
     
    440126
    441127        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
    442         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 2);
     128        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 2);
    443129        lz4I4AioK->setInitialArguments({mFileSize});
    444130        mGrepDriver->makeKernelCall(lz4I4AioK, {
    445131                mCompressedByteStream,
    446132
    447                 // Block Data
    448                 BlockData_IsCompressed,
    449                 BlockData_BlockStart,
    450                 BlockData_BlockEnd,
     133                blockInfo.isCompress,
     134                blockInfo.blockStart,
     135                blockInfo.blockEnd,
    451136
    452137                twistedCharClasses
     
    469154        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
    470155
    471         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 4);
     156        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 4);
    472157        lz4I4AioK->setInitialArguments({mFileSize});
    473158        mGrepDriver->makeKernelCall(lz4I4AioK, {
    474159                mCompressedByteStream,
    475160
    476                 // Block Data
    477                 BlockData_IsCompressed,
    478                 BlockData_BlockStart,
    479                 BlockData_BlockEnd,
     161                blockInfo.isCompress,
     162                blockInfo.blockStart,
     163                blockInfo.blockEnd,
    480164
    481165                twistedCharClasses
     
    490174    }
    491175
    492 
    493 
    494 
    495176    std::vector<StreamSetBuffer *> inputStreams = {
    496177            mCompressedByteStream,
    497178
    498             // Block Data
    499             BlockData_IsCompressed,
    500             BlockData_BlockStart,
    501             BlockData_BlockEnd
     179            blockInfo.isCompress,
     180            blockInfo.blockStart,
     181            blockInfo.blockEnd,
    502182    };
    503183
     
    512192    }
    513193
    514     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
     194    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamDecompressionKernel>(iBuilder, numbersOfStreams);
    515195    lz4AioK->setInitialArguments({mFileSize});
    516196    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
     
    521201
    522202StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    523     //// Decode Block Information
    524     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    525     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    526     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    527 
    528     //// Generate Helper Markers Extenders, FX, XF
    529 //    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    530 //    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    531 //    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    532 //    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    533 
    534 
    535     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    536     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    537     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    538 
     203    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    539204
    540205    // Produce unswizzled bit streams
     
    550215
    551216
    552     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
     217    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(iBuilder, 4, 2, 4);
    553218    lz4AioK->setInitialArguments({mFileSize});
    554219    mPxDriver.makeKernelCall(
     
    557222                    mCompressedByteStream,
    558223
    559 //                    Extenders,
    560 
    561                     // Block Data
    562                     BlockData_IsCompressed,
    563                     BlockData_BlockStart,
    564                     BlockData_BlockEnd,
     224                    blockInfo.isCompress,
     225                    blockInfo.blockStart,
     226                    blockInfo.blockEnd,
    565227
    566228                    u16Swizzle0,
     
    581243
    582244parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
    583     //// Decode Block Information
    584     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    585     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    586     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    587 
    588     //// Generate Helper Markers Extenders
    589 //    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    590 //    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    591 //    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    592 //    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    593 
    594     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    595     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    596     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    597 
    598 
     245    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    599246    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    600247
    601     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
     248    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamDecompressionKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
    602249    lz4AioK->setInitialArguments({mFileSize});
    603250    mPxDriver.makeKernelCall(
     
    606253                    mCompressedByteStream,
    607254
    608 //                    Extenders,
    609 
    610                     // Block Data
    611                     BlockData_IsCompressed,
    612                     BlockData_BlockStart,
    613                     BlockData_BlockEnd
     255                    blockInfo.isCompress,
     256                    blockInfo.blockStart,
     257                    blockInfo.blockEnd
    614258            }, {
    615259                    decompressionByteStream
     
    624268
    625269    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    626     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
     270    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder);
    627271    lz4AioK->setInitialArguments({mFileSize});
    628272    mPxDriver.makeKernelCall(
     
    642286}
    643287
    644 void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    645     //// Decode Block Information
    646     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    647     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    648     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    649 
    650     //// Generate Helper Markers Extenders, FX, XF
    651     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    652     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    653     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    654     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    655 
    656 
    657     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    658     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    659     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    660 
    661     //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    662 
    663     //TODO handle uncompressed part
    664     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    665     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    666     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    667 
    668     mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    669     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    670     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    671 
    672     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    673     Lz4IndexBuilderK->setInitialArguments({mFileSize});
    674     mPxDriver.makeKernelCall(
    675             Lz4IndexBuilderK,
    676             {
    677                     mCompressedByteStream,
    678                     Extenders,
    679 
    680                     // Block Data
    681                     BlockData_IsCompressed,
    682                     BlockData_BlockStart,
    683                     BlockData_BlockEnd
    684             }, {
    685                     //Uncompressed Data
    686                     UncompressedStartPos,
    687                     UncompressedLength,
    688                     UncompressedOutputPos,
    689 
    690                     mDeletionMarker,
    691                     mM0Marker,
    692                     mMatchOffsetMarker
    693             });
    694 
    695     Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    696     mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    697 
    698 }
    699 
    700 std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    701     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    702     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    703 
    704     Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
    705     mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
    706     return std::make_pair(u16Swizzle0, u16Swizzle1);
    707 }
    708 
    709 void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    710     if (!mCompressionMarker) {
    711         mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    712         Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
    713         mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
    714     }
    715 }
    716 
    717 parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    718     this->generateCompressionMarker(iBuilder);
    719 
    720     // Deletion
    721     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    722     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    723 
    724     Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    725     mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    726 
    727     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    728     Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    729     mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
    730 
    731     return compressedBits;
    732 }
    733 
    734288int LZ4Generator::get4MbBufferBlocks() {
    735289    return mLz4BlockSize / codegen::BlockSize;
     
    737291
    738292int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
    739     return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
     293    return this->get4MbBufferBlocks() * 2;
    740294}
    741295int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
    742     return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
     296    return this->get4MbBufferBlocks() * 2;
    743297}
    744298
Note: See TracChangeset for help on using the changeset viewer.