Changeset 6111


Ignore:
Timestamp:
Jun 24, 2018, 1:24:36 AM (3 months ago)
Author:
xwa163
Message:
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
Location:
icGREP/icgrep-devel/icgrep
Files:
11 added
12 deleted
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6093 r6111  
    103103add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    104104add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    105 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/lz4/lz4_fake_stream_generating_kernel.cpp kernels/lz4/lz4_index_builder_new.cpp kernels/lz4/lz4_bytestream_aio.cpp kernels/lz4/lz4_swizzled_aio.cpp kernels/lz4/lz4_parallel_bytestream_aio.cpp)
     105add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/fake_stream_generating_kernel.cpp kernels/lz4/aio/lz4_bytestream_aio.cpp kernels/lz4/aio/lz4_swizzled_aio.cpp kernels/lz4/aio/lz4_parallel_bytestream_aio.cpp kernels/lz4/aio/lz4_sequential_aio_base.cpp kernels/lz4/aio/lz4_sequential_aio_base.h)
    106106
    107107
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6110 r6111  
    427427    } else {
    428428        return CreatePopcount(fwCast(fw, a));
     429    }
     430}
     431
     432Value * IDISA_Builder::simd_cttz(unsigned fw, Value * a) {
     433    if (fw == 1) {
     434        return simd_not(a);
     435    } else {
     436        Value* v = simd_sub(fw, a, simd_fill(fw, getIntN(fw, 1)));
     437        v = simd_or(v, a);
     438        v = simd_xor(v, a);
     439        v = simd_popcount(fw, v);
     440        return v;
    429441    }
    430442}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r6109 r6111  
    137137   
    138138    virtual llvm::Value * simd_popcount(unsigned fw, llvm::Value * a);
     139    virtual llvm::Value * simd_cttz(unsigned fw, llvm::Value * a);
    139140
    140141    virtual llvm::Value * simd_bitreverse(unsigned fw, llvm::Value * a);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r6081 r6111  
    1515namespace kernel{
    1616
    17 LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName)
     17LZ4BlockDecoderKernel::LZ4BlockDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName)
    1818: SegmentOrientedKernel(std::string(kernelName),
    1919// Inputs
     
    4646}
    4747
    48 void LZ4BlockDecoderNewKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
     48void LZ4BlockDecoderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
    4949
    5050    Constant* INT64_0 = b->getInt64(0);
     
    161161}
    162162
    163 void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const isCompressed, Value * const blockStart, Value * const blockEnd) {
     163void LZ4BlockDecoderKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const isCompressed, Value * const blockStart, Value * const blockEnd) {
    164164    Value * const offset = iBuilder->getProducedItemCount("isCompressed");
    165165    generateStoreNumberOutput(iBuilder, "isCompressed", offset, iBuilder->CreateZExt(isCompressed, iBuilder->getInt8Ty()));
     
    169169}
    170170
    171 Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
     171Value* LZ4BlockDecoderKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
    172172    return iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", offset));
    173173}
    174174
    175 void LZ4BlockDecoderNewKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, const string &outputBufferName, Value * offset, Value *value) {
     175void LZ4BlockDecoderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, const string &outputBufferName, Value * offset, Value *value) {
    176176    iBuilder->CreateStore(value, iBuilder->getRawOutputPointer(outputBufferName, offset));
    177177}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.h

    r6081 r6111  
    2020namespace kernel {
    2121
    22 class LZ4BlockDecoderNewKernel : public SegmentOrientedKernel {
     22class LZ4BlockDecoderKernel : public SegmentOrientedKernel {
    2323public:
    24     LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName = "LZ4BlockDecoderKernel");
     24    LZ4BlockDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName = "LZ4BlockDecoderKernel");
    2525protected:
    2626    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6089 r6111  
    2323#include <kernels/lz4/lz4_block_decoder.h>
    2424#include <kernels/lz4/lz4_index_builder.h>
    25 #include <kernels/lz4/lz4_index_builder_new.h>
    26 #include <kernels/lz4/lz4_bytestream_aio.h>
    27 #include <kernels/lz4/lz4_parallel_bytestream_aio.h>
    28 #include <kernels/lz4/lz4_swizzled_aio.h>
     25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
     26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
     27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
    2928#include <kernels/bitstream_pdep_kernel.h>
    3029#include <kernels/lz4/lz4_bitstream_not_kernel.h>
     
    3635using namespace kernel;
    3736
    38 LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
     37LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
    3938    mCompressionMarker = NULL;
    4039}
     
    6564
    6665
    67     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     66    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    6867    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    6968    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    7978    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    8079
    81     Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
     80    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    8281    Lz4IndexBuilderK->setInitialArguments({mFileSize});
    8382    mPxDriver.makeKernelCall(
     
    402401    sourceK->setInitialArguments({mInputStream, mFileSize});
    403402    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
    404     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
     403    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian);
    405404    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
    406405}
     
    413412
    414413    //// Generate Helper Markers Extenders, FX, XF
    415     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    416     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    417     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    418     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    419 
    420 
    421     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     414//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     415//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
     416//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     417//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     418
     419
     420    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    422421    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    423422    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    442441            {
    443442                    mCompressedByteStream,
    444                     Extenders,
     443
     444//                    Extenders,
    445445
    446446                    // Block Data
     
    465465}
    466466
    467 parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter) {
     467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
    468468    //// Decode Block Information
    469469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     
    477477//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    478478
    479     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     479    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    480480    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    481481    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    484484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    485485
    486     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, enableGather, enableScatter);
     486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
    487487    lz4AioK->setInitialArguments({mFileSize});
    488488    mPxDriver.makeKernelCall(
     
    490490            {
    491491                    mCompressedByteStream,
     492
     493//                    Extenders,
    492494
    493495                    // Block Data
     
    511513
    512514    //// Generate Helper Markers Extenders
    513     /*
    514     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    515     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    516     Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    517     mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    518     */
    519 
    520 
    521     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     515//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     516//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
     517//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     518//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     519
     520
     521    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    522522    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    523523    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    558558
    559559
    560     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     560    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    561561    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    562562    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    636636
    637637int LZ4Generator::get4MbBufferBlocks() {
    638     return 4 * 1024 * 1024 / codegen::BlockSize;
     638    return mLz4BlockSize / codegen::BlockSize;
    639639}
    640640
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6081 r6111  
    4141    virtual void generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4242    virtual void generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    43     virtual parabix::StreamSetBuffer * generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter);
     43    virtual parabix::StreamSetBuffer * generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel);
    4444    virtual parabix::StreamSetBuffer * generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4545    virtual parabix::StreamSetBuffer * generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     
    7676    // M0CountMarker will not contain anything, it will only be used to pass producedItemCount and manage processedItemCount between different kernel
    7777    parabix::StreamSetBuffer * mM0Marker;
     78
     79    unsigned mLz4BlockSize;
    7880};
    7981
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6089 r6111  
    66#include <llvm/Support/PrettyStackTrace.h>
    77
    8 #include <cc/alphabet.h>
    98#include <cc/cc_compiler.h>
    109
     
    2322#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2423#include <kernels/lz4/lz4_bitstream_not_kernel.h>
    25 #include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
     24#include <kernels/fake_stream_generating_kernel.h>
    2625#include <kernels/bitstream_pdep_kernel.h>
    2726#include <kernels/bitstream_gather_pdep_kernel.h>
     
    5352#include <llvm/Support/Debug.h>
    5453#include <kernels/lz4/lz4_block_decoder.h>
    55 #include <kernels/lz4/lz4_swizzled_aio.h>
     54#include <kernels/lz4/aio/lz4_swizzled_aio.h>
    5655
    5756
     
    124123
    125124
    126     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    127126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    128127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     
    281280
    282281    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    283     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     282    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    284283    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    285284
     
    372371
    373372    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    374     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     373    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    375374    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    376375
     
    523522}
    524523
    525 void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
    526     auto & iBuilder = mPxDriver.getBuilder();
    527     this->generateMainFunc(iBuilder);
    528 
    529     // GeneratePipeline
    530     this->generateLoadByteStreamAndBitStream(iBuilder);
    531 
    532     std::vector<re::RE*> res = {regex};
    533     this->generateMultiplexingCompressedBitStream(res);
    534 
    535     mPxDriver.generatePipelineIR();
    536     mPxDriver.deallocateBuffers();
    537 
    538     iBuilder->CreateRetVoid();
    539 
    540     mPxDriver.finalizeObject();
    541 }
    542 
    543 void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
     524void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
    544525    auto & iBuilder = mPxDriver.getBuilder();
    545526    this->generateCountOnlyMainFunc(iBuilder);
     
    607588}
    608589
    609 void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter) {
     590void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
    610591    auto & iBuilder = mPxDriver.getBuilder();
    611592    this->generateCountOnlyMainFunc(iBuilder);
    612593
    613594    this->generateLoadByteStream(iBuilder);
    614     parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter);
     595    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
    615596
    616597
    617598    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    618     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
    619 //    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::BigEndian, "a");
     599    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
     600//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::LittleEndian, "a");
    620601    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
    621602
     
    651632
    652633    // GeneratePipeline
     634    this->generateLoadByteStream(iBuilder);
    653635//    this->generateLoadByteStreamAndBitStream(iBuilder);
    654     this->generateLoadByteStream(iBuilder);
     636
    655637    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
    656638
    657639
    658640    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    659     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
     641    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
    660642//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
    661643    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r6081 r6111  
    3333    void invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum);
    3434
    35     void generateMultiplexingSwizzledAioPipeline(re::RE* regex);
    36     void generateMultiplexingSwizzledAioPipeline2(re::RE* regex);
     35    void generateMultiplexingSwizzledAioPipeline(re::RE *regex);
    3736
    3837    void generateSwizzledAioPipeline(re::RE* regex);
    3938
    4039    void generateAioPipeline(re::RE* regex);
    41     void generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter);
     40    void generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel);
    4241
    4342    ScanMatchGrepMainFunctionType getScanMatchGrepMainFunction();
  • icGREP/icgrep-devel/icgrep/lz4_grep.cpp

    r6081 r6111  
    5454static cl::opt<bool> enableGather("enable-gather", cl::desc("Enable gather intrinsics"), cl::init(false), cl::cat(lz4GrepDebugFlags));
    5555static cl::opt<bool> enableScatter("enable-scatter", cl::desc("Enable scatter intrinsics"), cl::init(false), cl::cat(lz4GrepDebugFlags));
     56static cl::opt<int> minParallelLevel("min-parallel-level", cl::desc("Mininum parallel level"), cl::init(1), cl::cat(lz4GrepDebugFlags));
     57
    5658
    5759
     
    8082    if (aio) {
    8183        if (parallelDecompression) {
    82             g.generateParallelAioPipeline(re_ast, enableGather, enableScatter);
     84            g.generateParallelAioPipeline(re_ast, enableGather, enableScatter, minParallelLevel);
    8385        } else if (enableMultiplexing) {
    84             g.generateMultiplexingSwizzledAioPipeline2(re_ast);
     86            g.generateMultiplexingSwizzledAioPipeline(re_ast);
    8587        } else if (swizzledDecompression) {
    8688            g.generateSwizzledAioPipeline(re_ast);
Note: See TracChangeset for help on using the changeset viewer.