Changeset 6136


Ignore:
Timestamp:
Jul 31, 2018, 2:31:21 PM (2 weeks ago)
Author:
xwa163
Message:
  1. Cleanup legacy slow LZ4 related kernels
  2. Rename lz4d_ext_dep to lz4_decompression
  3. Rename LZ4 AIO related kernels to LZ4 Decompression Kernel
Location:
icGREP/icgrep-devel/icgrep
Files:
18 added
14 deleted
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6135 r6136  
    103103add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    104104add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    105 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/aio/lz4_bytestream_aio.cpp kernels/lz4/aio/lz4_swizzled_aio.cpp kernels/lz4/aio/lz4_parallel_bytestream_aio.cpp kernels/lz4/aio/lz4_sequential_aio_base.cpp kernels/lz4/aio/lz4_bitstream_aio.cpp kernels/lz4/aio/lz4_twist_aio.cpp kernels/lz4/aio/twist_kernel.cpp kernels/lz4/aio/untwist_kernel.cpp)
     105add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/decompression/lz4_bytestream_decompression.cpp kernels/lz4/decompression/lz4_swizzled_decompression.cpp kernels/lz4/decompression/lz4_parallel_bytestream_decompression.cpp kernels/lz4/decompression/lz4_sequential_decompression_base.cpp kernels/lz4/decompression/lz4_bitstream_decompression.cpp kernels/lz4/decompression/lz4_twist_decompression.cpp kernels/lz4/twist_kernel.cpp kernels/lz4/untwist_kernel.cpp)
    106106add_library(LZParabix_Lib lzparabix/LZParabixGenerator.cpp kernels/lzparabix/decoder/LZParabixBlockDecoder.cpp kernels/lzparabix/decoder/LZParabixAioBaseKernel.cpp lzparabix/LZParabixGrepGenerator.cpp kernels/fake_stream_generating_kernel.cpp kernels/lzparabix/encoder/LZParabixCompressionKernel.cpp kernels/lzparabix/decoder/LZParabixLiteralDecoderKernel.cpp kernels/lzparabix/decoder/LZParabixBitStreamAioKernel.cpp kernels/lzparabix/decoder/LZParabixSwizzledAioKernel.cpp)
    107107
     
    129129add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
    130130add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp kernels/bitstream_pdep_kernel.cpp)
    131 add_executable(lz4d_ext_dep lz4d_ext_dep.cpp)
     131add_executable(lz4_decoder lz4_decoder.cpp)
    132132add_executable(lz4_grep grep_interface.cpp util/file_select.cpp lz4_grep.cpp lz4/LZ4GrepGenerator.cpp)
    133133add_executable(lzparabix_decoder lzparabix_decoder.cpp)
     
    150150target_link_libraries (character_deletion PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    151151target_link_libraries (character_deposit PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    152 target_link_libraries (lz4d_ext_dep LZ4_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
     152target_link_libraries (lz4_decoder LZ4_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    153153target_link_libraries (lz4_grep LZ4_Lib GrepEngine UCDlib PabloADT RegExpCompiler CodeGen CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
    154154target_link_libraries (lzparabix_decoder LZParabix_Lib PabloADT CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${Boost_LIBRARIES} ${CUDA_LIB})
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6135 r6136  
    1313#include <kernels/source_kernel.h>
    1414#include <kernels/stdout_kernel.h>
    15 #include <kernels/lz4/lz4_generate_deposit_stream.h>
    1615#include <kernels/kernel_builder.h>
    1716#include <kernels/deletion.h>
     
    1918#include <kernels/pdep_kernel.h>
    2019#include <kernels/swizzled_multiple_pdep_kernel.h>
    21 #include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    22 #include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2320#include <kernels/lz4/lz4_block_decoder.h>
    24 #include <kernels/lz4/lz4_index_builder.h>
    25 #include <kernels/lz4/aio/lz4_bytestream_aio.h>
    26 #include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
    27 #include <kernels/lz4/aio/lz4_swizzled_aio.h>
    28 #include <kernels/lz4/aio/lz4_bitstream_aio.h>
    29 #include <kernels/lz4/aio/lz4_twist_aio.h>
     21#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
     22#include <kernels/lz4/decompression/lz4_parallel_bytestream_decompression.h>
     23#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
     24#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
     25#include <kernels/lz4/decompression/lz4_twist_decompression.h>
    3026#include <kernels/bitstream_pdep_kernel.h>
    31 #include <kernels/lz4/lz4_bitstream_not_kernel.h>
    32 #include <kernels/lz4/aio/twist_kernel.h>
    33 #include <kernels/lz4/aio/untwist_kernel.h>
     27#include <kernels/lz4/twist_kernel.h>
     28#include <kernels/lz4/untwist_kernel.h>
    3429
    3530namespace re { class CC; }
     
    4035
    4136LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
    42     mCompressionMarker = NULL;
    4337}
    4438
     
    4741}
    4842
    49 void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
    50     auto & iBuilder = mPxDriver.getBuilder();
    51     this->generateMainFunc(iBuilder);
    52 
    53     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    54 
    55     // GeneratePipeline
    56     this->generateLoadByteStreamAndBitStream(iBuilder);
    57 
    58     //// Decode Block Information
    59     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    60     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    61     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    62 
    63     //// Generate Helper Markers Extenders, FX, XF
    64     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    65     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    66     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    67     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    68 
    69 
    70     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    71     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    72     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    73 
    74     //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    75     //TODO handle uncompressed part
    76     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    77     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    78     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    79 
    80     mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    81     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    82     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    83 
    84     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    85     Lz4IndexBuilderK->setInitialArguments({mFileSize});
    86     mPxDriver.makeKernelCall(
    87             Lz4IndexBuilderK,
    88             {
    89                     mCompressedByteStream,
    90                     Extenders,
    91 
    92                     // Block Data
    93                     BlockData_IsCompressed,
    94                     BlockData_BlockStart,
    95                     BlockData_BlockEnd
    96             }, {
    97                     //Uncompressed Data
    98                     UncompressedStartPos,
    99                     UncompressedLength,
    100                     UncompressedOutputPos,
    101 
    102                     mCompressionMarker,
    103                     mM0Marker,
    104                     mMatchOffsetMarker
    105             });
    106 
    107     Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    108     mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    109 
    110 
    111     // Deletion
    112     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    113     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    114 
    115     Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    116     mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    117 
    118     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    119     Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    120     mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
    121 
    122 
    123     StreamSetBuffer * const extractedBits = compressedBits;
    124 
    125     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    126     mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
    127 
    128     // --------------------------------------------------------
    129     // End
    130     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    131 
    132     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    133     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     43void LZ4Generator::generateDecompressionPipeline(const std::string &outputFile) {
     44    auto & b = mPxDriver.getBuilder();
     45
     46    this->generateMainFunc(b);
     47    this->generateLoadByteStreamAndBitStream(b);
     48    parabix::StreamSetBuffer* uncompressedByteStream = this->generateAIODecompression(b);
     49
     50    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(b, 8);
     51    outK->setInitialArguments({b->GetString(outputFile)});
     52    mPxDriver.makeKernelCall(outK, {uncompressedByteStream}, {});
    13453
    13554    mPxDriver.generatePipelineIR();
    13655    mPxDriver.deallocateBuffers();
    13756
    138     iBuilder->CreateRetVoid();
     57    b->CreateRetVoid();
    13958
    14059    mPxDriver.finalizeObject();
    14160}
    14261
    143 void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
    144     auto & iBuilder = mPxDriver.getBuilder();
    145     this->generateMainFunc(iBuilder);
    146 
    147     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    148 
    149     // GeneratePipeline
    150     this->generateLoadByteStreamAndBitStream(iBuilder);
    151     this->generateExtractAndDepositMarkers(iBuilder);
    152     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    153 
    154     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    155     mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
    156 
    157     // --------------------------------------------------------
    158     // End
    159     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    160 
    161     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    162     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    163 
    164     mPxDriver.generatePipelineIR();
    165     mPxDriver.deallocateBuffers();
    166 
    167     iBuilder->CreateRetVoid();
    168 
    169     mPxDriver.finalizeObject();
    170 }
    171 
    172 void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
    173     auto & iBuilder = mPxDriver.getBuilder();
    174     this->generateMainFunc(iBuilder);
    175 
    176     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    177 
    178     // GeneratePipeline
    179     this->generateLoadByteStreamAndBitStream(iBuilder);
    180     this->generateExtractAndDepositMarkers(iBuilder);
    181     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    182 
    183 
    184     // Produce unswizzled bit streams
    185     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    186     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    187 
    188     mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
    189 
    190 
    191     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    192     mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    193 
    194     // --------------------------------------------------------
    195     // End
    196     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    197 
    198     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    199     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    200 
    201     mPxDriver.generatePipelineIR();
    202     mPxDriver.deallocateBuffers();
    203 
    204     iBuilder->CreateRetVoid();
    205 
    206     mPxDriver.finalizeObject();
    207 }
    208 
    209 void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    210     auto & iBuilder = mPxDriver.getBuilder();
    211     this->generateMainFunc(iBuilder);
    212 
    213     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    214 
    215     // GeneratePipeline
    216     this->generateLoadByteStreamAndBitStream(iBuilder);
    217     this->generateExtractAndDepositMarkers(iBuilder);
    218     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    219 
    220     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    221     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    222     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    223 
    224     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    225     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    226 
    227     // --------------------------------------------------------
    228     // End
    229     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    230 
    231     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    232     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    233 
    234     mPxDriver.generatePipelineIR();
    235     mPxDriver.deallocateBuffers();
    236 
    237     iBuilder->CreateRetVoid();
    238 
    239     mPxDriver.finalizeObject();
    240 }
    241 
    242 void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    243     auto & iBuilder = mPxDriver.getBuilder();
    244     this->generateMainFunc(iBuilder);
    245 
    246     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    247 
    248     // GeneratePipeline
    249     this->generateLoadByteStreamAndBitStream(iBuilder);
    250     this->generateExtractAndDepositMarkers(iBuilder);
    251 
    252     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    253 
    254     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    255     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    256 
    257     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    258     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    259 
    260     // Produce unswizzled bit streams
    261     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    262     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    263     mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
    264 
    265     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    266     mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
    267 
    268     // --------------------------------------------------------
    269     // End
    270     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    271     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    272     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    273 
    274     mPxDriver.generatePipelineIR();
    275     mPxDriver.deallocateBuffers();
    276 
    277     iBuilder->CreateRetVoid();
    278 
    279     mPxDriver.finalizeObject();
    280 }
    281 
    282 void LZ4Generator::generatePipeline(const std::string &outputFile) {
    283     auto & iBuilder = mPxDriver.getBuilder();
    284     this->generateMainFunc(iBuilder);
    285 
    286     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    287 
    288     // GeneratePipeline
    289     this->generateLoadByteStreamAndBitStream(iBuilder);
    290     this->generateExtractAndDepositMarkers(iBuilder);
    291     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    292 
    293     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    294     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    295     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    296 
    297     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    298     Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    299     mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    300 
    301     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    302     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    303 
    304     // --------------------------------------------------------
    305     // End
    306     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    307 
    308     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    309     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    310 
    311     mPxDriver.generatePipelineIR();
    312     mPxDriver.deallocateBuffers();
    313 
    314     iBuilder->CreateRetVoid();
    315 
    316     mPxDriver.finalizeObject();
    317 }
    318 
    319 void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
    320     auto & iBuilder = mPxDriver.getBuilder();
    321     this->generateMainFunc(iBuilder);
    322 
    323     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    324 
    325     // GeneratePipeline
    326     this->generateLoadByteStreamAndBitStream(iBuilder);
    327     this->generateExtractAndDepositMarkers(iBuilder);
    328 
    329     auto swizzle = this->generateSwizzleExtractData(iBuilder);
    330 
    331     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    332     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    333 
    334     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    335     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    336 
    337     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    338     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    339 
    340     Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    341     mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    342 
    343 
    344     // Produce unswizzled bit streams
    345     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    346     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    347     mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
    348 
    349 
    350     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    351     mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
    352 
    353     // --------------------------------------------------------
    354     // End
    355     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    356     outK->setInitialArguments({iBuilder->GetString(outputFile)});
    357     mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    358 
    359     mPxDriver.generatePipelineIR();
    360     mPxDriver.deallocateBuffers();
    361 
    362     iBuilder->CreateRetVoid();
    363 
    364     mPxDriver.finalizeObject();
    365 }
    36662
    36763void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     
    418114    auto & iBuilder = mGrepDriver->getBuilder();
    419115
    420     //// Decode Block Information
    421     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    422     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    423     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    424 
    425 
    426     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    427     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    428     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    429 
    430 
     116    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    431117
    432118    size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams();
     
    440126
    441127        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
    442         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 2);
     128        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 2);
    443129        lz4I4AioK->setInitialArguments({mFileSize});
    444130        mGrepDriver->makeKernelCall(lz4I4AioK, {
    445131                mCompressedByteStream,
    446132
    447                 // Block Data
    448                 BlockData_IsCompressed,
    449                 BlockData_BlockStart,
    450                 BlockData_BlockEnd,
     133                blockInfo.isCompress,
     134                blockInfo.blockStart,
     135                blockInfo.blockEnd,
    451136
    452137                twistedCharClasses
     
    469154        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
    470155
    471         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 4);
     156        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 4);
    472157        lz4I4AioK->setInitialArguments({mFileSize});
    473158        mGrepDriver->makeKernelCall(lz4I4AioK, {
    474159                mCompressedByteStream,
    475160
    476                 // Block Data
    477                 BlockData_IsCompressed,
    478                 BlockData_BlockStart,
    479                 BlockData_BlockEnd,
     161                blockInfo.isCompress,
     162                blockInfo.blockStart,
     163                blockInfo.blockEnd,
    480164
    481165                twistedCharClasses
     
    490174    }
    491175
    492 
    493 
    494 
    495176    std::vector<StreamSetBuffer *> inputStreams = {
    496177            mCompressedByteStream,
    497178
    498             // Block Data
    499             BlockData_IsCompressed,
    500             BlockData_BlockStart,
    501             BlockData_BlockEnd
     179            blockInfo.isCompress,
     180            blockInfo.blockStart,
     181            blockInfo.blockEnd,
    502182    };
    503183
     
    512192    }
    513193
    514     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
     194    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamDecompressionKernel>(iBuilder, numbersOfStreams);
    515195    lz4AioK->setInitialArguments({mFileSize});
    516196    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
     
    521201
    522202StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    523     //// Decode Block Information
    524     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    525     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    526     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    527 
    528     //// Generate Helper Markers Extenders, FX, XF
    529 //    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    530 //    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    531 //    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    532 //    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    533 
    534 
    535     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    536     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    537     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    538 
     203    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    539204
    540205    // Produce unswizzled bit streams
     
    550215
    551216
    552     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
     217    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(iBuilder, 4, 2, 4);
    553218    lz4AioK->setInitialArguments({mFileSize});
    554219    mPxDriver.makeKernelCall(
     
    557222                    mCompressedByteStream,
    558223
    559 //                    Extenders,
    560 
    561                     // Block Data
    562                     BlockData_IsCompressed,
    563                     BlockData_BlockStart,
    564                     BlockData_BlockEnd,
     224                    blockInfo.isCompress,
     225                    blockInfo.blockStart,
     226                    blockInfo.blockEnd,
    565227
    566228                    u16Swizzle0,
     
    581243
    582244parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
    583     //// Decode Block Information
    584     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    585     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    586     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    587 
    588     //// Generate Helper Markers Extenders
    589 //    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    590 //    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    591 //    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    592 //    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    593 
    594     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    595     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    596     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    597 
    598 
     245    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
    599246    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    600247
    601     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
     248    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamDecompressionKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
    602249    lz4AioK->setInitialArguments({mFileSize});
    603250    mPxDriver.makeKernelCall(
     
    606253                    mCompressedByteStream,
    607254
    608 //                    Extenders,
    609 
    610                     // Block Data
    611                     BlockData_IsCompressed,
    612                     BlockData_BlockStart,
    613                     BlockData_BlockEnd
     255                    blockInfo.isCompress,
     256                    blockInfo.blockStart,
     257                    blockInfo.blockEnd
    614258            }, {
    615259                    decompressionByteStream
     
    624268
    625269    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    626     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
     270    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder);
    627271    lz4AioK->setInitialArguments({mFileSize});
    628272    mPxDriver.makeKernelCall(
     
    642286}
    643287
    644 void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    645     //// Decode Block Information
    646     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    647     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    648     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    649 
    650     //// Generate Helper Markers Extenders, FX, XF
    651     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    652     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    653     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    654     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    655 
    656 
    657     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    658     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    659     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    660 
    661     //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    662 
    663     //TODO handle uncompressed part
    664     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    665     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    666     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    667 
    668     mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    669     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    670     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    671 
    672     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    673     Lz4IndexBuilderK->setInitialArguments({mFileSize});
    674     mPxDriver.makeKernelCall(
    675             Lz4IndexBuilderK,
    676             {
    677                     mCompressedByteStream,
    678                     Extenders,
    679 
    680                     // Block Data
    681                     BlockData_IsCompressed,
    682                     BlockData_BlockStart,
    683                     BlockData_BlockEnd
    684             }, {
    685                     //Uncompressed Data
    686                     UncompressedStartPos,
    687                     UncompressedLength,
    688                     UncompressedOutputPos,
    689 
    690                     mDeletionMarker,
    691                     mM0Marker,
    692                     mMatchOffsetMarker
    693             });
    694 
    695     Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    696     mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    697 
    698 }
    699 
    700 std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    701     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    702     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    703 
    704     Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
    705     mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
    706     return std::make_pair(u16Swizzle0, u16Swizzle1);
    707 }
    708 
    709 void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    710     if (!mCompressionMarker) {
    711         mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    712         Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
    713         mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
    714     }
    715 }
    716 
    717 parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    718     this->generateCompressionMarker(iBuilder);
    719 
    720     // Deletion
    721     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    722     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    723 
    724     Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    725     mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    726 
    727     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    728     Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    729     mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
    730 
    731     return compressedBits;
    732 }
    733 
    734288int LZ4Generator::get4MbBufferBlocks() {
    735289    return mLz4BlockSize / codegen::BlockSize;
     
    737291
    738292int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
    739     return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
     293    return this->get4MbBufferBlocks() * 2;
    740294}
    741295int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
    742     return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
     296    return this->get4MbBufferBlocks() * 2;
    743297}
    744298
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6132 r6136  
    2929public:
    3030    LZ4Generator();
    31 
    3231    MainFunctionType getMainFunc();
    3332
    34     void generatePipeline(const std::string &outputFile);
    35     void generateSwizzledPipeline(const std::string &outputFile);
    36     void generateNewExtractOnlyPipeline(const std::string &outputFile);
    37     void generateExtractOnlyPipeline(const std::string &outputFile);
    38     void generateSwizzledExtractOnlyPipeline(const std::string &outputFile);
    39     void generateExtractAndDepositOnlyPipeline(const std::string &outputFile);
    40     void generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile);
     33    void generateDecompressionPipeline(const std::string &outputFile);
     34protected:
    4135
    42 protected:
    4336    //// Protected Method
    4437    std::vector<parabix::StreamSetBuffer*> convertCompressedBitsStreamWithBitStreamAioApproach(
     
    4942    virtual void generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5043    virtual void generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    51     virtual void generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5244    virtual parabix::StreamSetBuffer * generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel);
    5345    virtual parabix::StreamSetBuffer * generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5446    virtual parabix::StreamSetBuffer * generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5547    virtual parabix::StreamSetBuffer * generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    56 
    57     virtual std::pair<parabix::StreamSetBuffer*, parabix::StreamSetBuffer*> generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    58     virtual parabix::StreamSetBuffer* generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    59 
    60     void generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    6148
    6249    // BufferSize related Helper Function
     
    7663    llvm::Value * mHasBlockChecksum;
    7764
    78 
    7965    // StreamSetBuffers
    8066    parabix::StreamSetBuffer * mCompressedByteStream;
    8167    parabix::StreamSetBuffer * mCompressedBasisBits;
    82     parabix::StreamSetBuffer * mDeletionMarker;
    83     parabix::StreamSetBuffer * mCompressionMarker;
    84     parabix::StreamSetBuffer * mDepositMarker;
    85     parabix::StreamSetBuffer * mMatchOffsetMarker;
    86 
    87     // M0CountMarker will not contain anything, it will only be used to pass producedItemCount and manage processedItemCount between different kernel
    88     parabix::StreamSetBuffer * mM0Marker;
    8968
    9069    unsigned mLz4BlockSize;
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6135 r6136  
    1313#include <kernels/source_kernel.h>
    1414#include <kernels/stdout_kernel.h>
    15 #include <kernels/lz4/lz4_generate_deposit_stream.h>
    1615#include <kernels/kernel_builder.h>
    1716#include <kernels/deletion.h>
     
    1918#include <kernels/pdep_kernel.h>
    2019#include <kernels/swizzled_multiple_pdep_kernel.h>
    21 #include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    22 #include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    23 #include <kernels/lz4/lz4_bitstream_not_kernel.h>
    2420#include <kernels/fake_stream_generating_kernel.h>
    2521#include <kernels/bitstream_pdep_kernel.h>
     
    4844#include <kernels/scanmatchgen.h>
    4945#include <kernels/until_n.h>
    50 #include <re/casing.h>
    51 #include <re/exclude_CC.h>
    52 #include <re/to_utf8.h>
    53 #include <re/re_analysis.h>
    54 #include <re/re_name_resolve.h>
    55 #include <re/re_name_gather.h>
    56 #include <re/re_multiplex.h>
    57 #include <re/re_utility.h>
    5846#include <re/grapheme_clusters.h>
    5947#include <re/printer_re.h>
     
    6149#include <llvm/Support/Debug.h>
    6250#include <kernels/lz4/lz4_block_decoder.h>
    63 #include <kernels/lz4/aio/lz4_swizzled_aio.h>
    64 #include <kernels/lz4/aio/lz4_bitstream_aio.h>
     51#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
     52#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
    6553#include <re/re_seq.h>
    66 #include <kernels/lz4/aio/lz4_bytestream_aio.h>
     54#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
    6755
    6856namespace re { class CC; }
     
    130118
    131119    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
    132     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(b, true);
     120    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b, true);
    133121    lz4AioK->setInitialArguments({mFileSize});
    134122    mPxDriver.makeKernelCall(
     
    168156
    169157
    170     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(b, 4, 1, 4);
     158    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(b, 4, 1, 4);
    171159    lz4AioK->setInitialArguments({mFileSize});
    172160    mPxDriver.makeKernelCall(
     
    196184}
    197185
    198 StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
    199     auto mGrepDriver = &mPxDriver;
    200     auto & idb = mGrepDriver->getBuilder();
    201 
    202     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
    203     Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
    204     mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
    205 
    206     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
    207     Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
    208     mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
    209 
    210     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
    211     Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
    212     mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
    213 
    214     // Produce unswizzled bit streams
    215     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    216     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
    217     mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
    218 
    219     return matchCopiedBits;
    220 }
    221 parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
    222     if (numberOfStream == 4) {
    223         return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
    224     }
    225 
    226     auto mGrepDriver = &mPxDriver;
    227     auto & idb = mGrepDriver->getBuilder();
    228 
    229     // Extract (Deletion)
    230     this->generateCompressionMarker(idb);
    231 
    232     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
    233     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
    234 
    235     Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
    236     mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
    237 
    238     StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
    239     Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
    240     mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
    241 
    242     // Deposit
    243     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
    244     Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
    245     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
    246 
    247     // Match Copy
    248     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
    249     Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
    250     mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    251 
    252     return matchCopiedBits;
    253 }
    254 
    255 parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
    256     auto mGrepDriver = &mPxDriver;
    257     auto & idb = mGrepDriver->getBuilder();
    258     const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    259 
    260     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    261     kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
    262     mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    263     return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
    264 }
    265 
    266 void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
     186
     187std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
     188
    267189    this->initREs(REs);
    268190    auto mGrepDriver = &mPxDriver;
     
    271193    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    272194    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    273 
     195    int MaxCountFlag = 0;
    274196
    275197    //  Regular Expression Processing and Analysis Phase
     
    278200    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    279201
     202
    280203    std::map<std::string, StreamSetBuffer *> propertyStream;
    281204
     
    283206    std::set<re::Name *> UnicodeProperties;
    284207
    285     const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     208    re::CC* linefeedCC = re::makeCC(0x0A);
     209
     210    re::Seq* seq = re::makeSeq();
     211    seq->push_back(mREs[0]);
     212    seq->push_back(std::move(linefeedCC));
     213
     214
     215    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    286216    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    287217
     
    295225    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
    296226
    297     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    298     kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
    299     mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    300 
    301     StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
    302     kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    303     mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
    304     StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
    305             combinedStream, 1 + numOfCharacterClasses, "combined");
    306 
    307     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
    308     StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    309     kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    310     mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
    311 
    312 
    313 
    314     StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    315     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    316     mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    317 
    318     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
    319     mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
    320     MatchResultsBufs[0] = MatchResults;
    321 
    322     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    323     if (mREs.size() > 1) {
    324         MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    325         kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    326         mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    327     }
    328     StreamSetBuffer * Matches = MergedResults;
    329     if (mMoveMatchesToEOL) {
    330         StreamSetBuffer * OriginalMatches = Matches;
    331         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    332         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    333         mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    334     }
    335 
    336 //    if (MaxCountFlag > 0) {
    337 //        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    338 //        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    339 //        StreamSetBuffer * const AllMatches = Matches;
    340 //        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    341 //        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    342 //    }
    343 
    344 //    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    345 
    346 };
    347 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
    348 
    349     this->initREs(REs);
    350     auto mGrepDriver = &mPxDriver;
    351 
    352     auto & idb = mGrepDriver->getBuilder();
    353     // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    354     const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    355     int MaxCountFlag = 0;
    356 
    357     //  Regular Expression Processing and Analysis Phase
    358     const auto nREs = mREs.size();
    359 
    360     std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    361 
    362 
    363     std::map<std::string, StreamSetBuffer *> propertyStream;
    364 
    365     std::vector<std::string> externalStreamNames;
    366     std::set<re::Name *> UnicodeProperties;
    367 
    368     re::CC* linefeedCC = re::makeCC(0x0A);
    369 
    370     re::Seq* seq = re::makeSeq();
    371     seq->push_back(mREs[0]);
    372     seq->push_back(std::move(linefeedCC));
    373 
    374 
    375     const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    376     StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    377 
    378     mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    379     mREs[0] = transformCCs(mpx.get(), mREs[0]);
    380     std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    381     auto numOfCharacterClasses = mpx_basis.size();
    382     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    383 
    384     kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
    385     mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
    386 
    387227    StreamSetBuffer * decompressedCharClasses = nullptr;
    388228    if (useSwizzled) {
    389         if (useAio) {
    390             decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
    391         } else {
    392             decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "combined");
    393         }
     229        decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
    394230    } else if (useByteStream){
    395231        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
     
    531367    this->generateScanMatchMainFunc(iBuilder);
    532368
     369    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    533370    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    534371
     372    /*
    535373    // GeneratePipeline
    536374    this->generateLoadByteStreamAndBitStream(iBuilder);
     
    558396    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
    559397    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     398*/
     399    // TODO fix this, generate DecompressedByteStream with LZ4ByteStreamDecompressionKernel
    560400
    561401    StreamSetBuffer * LineBreakStream;
     
    777617
    778618    // GeneratePipeline
    779 //    this->generateLoadByteStream(iBuilder);
    780     this->generateLoadByteStreamAndBitStream(iBuilder);
     619    this->generateLoadByteStream(iBuilder);
     620//    this->generateLoadByteStreamAndBitStream(iBuilder);
    781621
    782622    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
     
    814654}
    815655
    816 
    817 void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
    818     auto & iBuilder = mPxDriver.getBuilder();
    819     this->generateCountOnlyMainFunc(iBuilder);
    820 
    821     // GeneratePipeline
    822     this->generateLoadByteStreamAndBitStream(iBuilder);
    823     this->generateExtractAndDepositMarkers(iBuilder);
    824 
    825     StreamSetBuffer * LineBreakStream;
    826     StreamSetBuffer * Matches;
    827     std::vector<re::RE*> res = {regex};
    828     if (mEnableMultiplexing) {
    829         std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
    830     } else {
    831         StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    832         StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    833         Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    834         mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    835 
    836         StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    837         Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    838         mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    839 
    840         std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
    841     };
    842 
    843     kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
    844     mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
    845     mPxDriver.generatePipelineIR();
    846 
    847     iBuilder->setKernel(matchCountK);
    848     Value * matchedLineCount = iBuilder->getAccumulator("countResult");
    849     matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
    850 
    851     mPxDriver.deallocateBuffers();
    852 
    853     iBuilder->CreateRet(matchedLineCount);
    854 
    855     mPxDriver.finalizeObject();
    856 }
    857 
    858 
    859 void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
    860     auto & iBuilder = mPxDriver.getBuilder();
    861     this->generateCountOnlyMainFunc(iBuilder);
    862 
    863 
    864     // GeneratePipeline
    865     this->generateLoadByteStreamAndBitStream(iBuilder);
    866     this->generateExtractAndDepositMarkers(iBuilder);
    867 
    868 
    869     StreamSetBuffer * LineBreakStream;
    870     StreamSetBuffer * Matches;
    871     std::vector<re::RE*> res = {regex};
    872     if (mEnableMultiplexing) {
    873         std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
    874     } else {
    875         auto swizzle = this->generateSwizzleExtractData(iBuilder);
    876 
    877         StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    878         StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    879 
    880         Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    881         mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    882 
    883 
    884         // split PDEP into 2 kernel will be a little slower in single thread environment
    885 /*
    886     Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
    887     mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
    888 
    889     Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
    890     mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
    891 */
    892 
    893         StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    894         StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    895 
    896         Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    897         mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    898 
    899         // Produce unswizzled bit streams
    900         StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    901         Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    902         mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
    903 
    904 
    905         std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
    906     };
    907 
    908     kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
    909     mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
    910     mPxDriver.generatePipelineIR();
    911 
    912     iBuilder->setKernel(matchCountK);
    913     Value * matchedLineCount = iBuilder->getAccumulator("countResult");
    914     matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
    915 
    916     mPxDriver.deallocateBuffers();
    917 
    918     iBuilder->CreateRet(matchedLineCount);
    919 
    920     mPxDriver.finalizeObject();
    921 }
    922656
    923657ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r6132 r6136  
    2020
    2121    LZ4GrepGenerator(bool enableMultiplexing = false);
    22     void generateSwizzledCountOnlyGrepPipeline(re::RE *regex);
    23     void generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather = true);
    2422
    2523    void generateScanMatchGrepPipeline(re::RE* regex);
     
    2725                                                                                   parabix::StreamSetBuffer *decompressedBasisBits);
    2826
    29     void generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs);
    3027    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio = false, bool useSwizzled = true, bool useByteStream = false);
    3128
     
    7168
    7269    parabix::StreamSetBuffer * linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits);
    73     parabix::StreamSetBuffer * linefeedStreamFromCompressedBits();
    7470
    7571
    76     parabix::StreamSetBuffer * convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix);
     72
    7773    parabix::StreamSetBuffer * convertCompressedBitsStreamWithSwizzledAioApproach(
    7874            parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix);
     
    8076            parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix);
    8177
    82 
    83     parabix::StreamSetBuffer * convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix);
    8478};
    8579
  • icGREP/icgrep-devel/icgrep/lz4_grep.cpp

    r6132 r6136  
    4949
    5050static cl::OptionCategory lz4GrepDebugFlags("LZ4 Grep Debug Flags", "lz4d debug options");
    51 static cl::opt<bool> aio("aio", cl::desc("Use All-in-One Approach for LZ4 Decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
    5251static cl::opt<bool> parallelDecompression("parallel-decompression", cl::desc("Use parallel Approach for LZ4 Decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
    5352static cl::opt<bool> swizzledDecompression("swizzled-decompression", cl::desc("Use swizzle approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
     
    8079    re::RE * re_ast = re::RE_Parser::parse(regexString, re::MULTILINE_MODE_FLAG);
    8180    LZ4GrepGenerator g(enableMultiplexing);
    82     if (aio) {
     81    if (countOnly) {
    8382        if (parallelDecompression) {
    8483            g.generateParallelAioPipeline(re_ast, enableGather, enableScatter, minParallelLevel);
     
    106105        uint64_t countResult = main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
    107106        llvm::outs() << countResult << "\n";
    108     } else if (countOnly) {
    109         if (swizzledDecompression) {
    110             g.generateSwizzledCountOnlyGrepPipeline(re_ast);
    111         } else {
    112             g.generateCountOnlyGrepPipeline(re_ast, enableGather);
    113         }
    114 
    115         auto main = g.getCountOnlyGrepMainFunction();
    116         uint64_t countResult = main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
    117         llvm::outs() << countResult << "\n";
    118107    } else {
    119108        g.generateScanMatchGrepPipeline(re_ast);
Note: See TracChangeset for help on using the changeset viewer.