Changeset 6064


Ignore:
Timestamp:
Jun 4, 2018, 3:20:10 AM (5 months ago)
Author:
xwa163
Message:

Init checkin for LZ4ParallelByteStreamAioKernel and related pipeline

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6059 r6064  
    103103add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    104104add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    105 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/lz4/lz4_fake_stream_generating_kernel.cpp kernels/lz4/lz4_index_builder_new.cpp kernels/lz4/lz4_bytestream_aio.cpp kernels/lz4/lz4_swizzled_aio.cpp)
     105add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/lz4/lz4_fake_stream_generating_kernel.cpp kernels/lz4/lz4_index_builder_new.cpp kernels/lz4/lz4_bytestream_aio.cpp kernels/lz4/lz4_swizzled_aio.cpp kernels/lz4/lz4_parallel_bytestream_aio.cpp)
    106106
    107107
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_bytestream_aio.cpp

    r6061 r6064  
    2121                                   {
    2222                                           Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
    23                                            Binding{b->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
     23//                                           Binding{b->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
    2424
    2525                                           // block data
     
    5959        // While in SegmentOrigentedKernel, availableItemCount == producedItemCount from previous kernel
    6060        Value * totalNumber = b->getAvailableItemCount("blockEnd");
    61         Value * totalExtender = b->getAvailableItemCount("extender");
     61//        Value * totalExtender = b->getAvailableItemCount("extender");
    6262
    6363        Value * blockEnd = this->generateLoadInt64NumberInput(b, "blockEnd", blockDataIndex);
     
    6868        Value * blockStart = this->generateLoadInt64NumberInput(b, "blockStart", blockDataIndex);
    6969        BasicBlock * processBlock = b->CreateBasicBlock("processBlock");
    70         b->CreateCondBr(b->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     70//        b->CreateCondBr(b->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     71        b->CreateBr(processBlock);
    7172
    7273        b->SetInsertPoint(processBlock);
     
    266267    llvm::Value *LZ4ByteStreamAioKernel::processBlockBoundary(const std::unique_ptr<KernelBuilder> &b, llvm::Value *beginTokenPos,
    267268                                                    llvm::Value *lz4BlockEnd) {
    268 // Constant
     269        // Constant
    269270        ConstantInt* SIZE_0 = b->getSize(0);
    270271        ConstantInt* SIZE_1 = b->getSize(1);
     
    561562        b->SetInsertPoint(literalCopyBody);
    562563        // Always copy fw bits to improve performance
    563         // TODO sometime it will crash because of overflow copy in the end of the buffer, need to add 4 bytes of
    564         //      extra buffer in order to make sure it does not crash.
    565564        b->CreateStore(b->CreateLoad(phiInputPtr), phiOutputPtr);
    566565
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6059 r6064  
    2525#include <kernels/lz4/lz4_index_builder_new.h>
    2626#include <kernels/lz4/lz4_bytestream_aio.h>
     27#include <kernels/lz4/lz4_parallel_bytestream_aio.h>
    2728#include <kernels/lz4/lz4_swizzled_aio.h>
    2829#include <kernels/bitstream_pdep_kernel.h>
     
    388389}
    389390
     391void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     392    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     393    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     394    sourceK->setInitialArguments({mInputStream, mFileSize});
     395    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
     396}
    390397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    391398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     
    458465}
    459466
    460 
     467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     468    //// Decode Block Information
     469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     470    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     471    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     472
     473    //// Generate Helper Markers Extenders
     474    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     475    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     476    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     477    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     478
     479    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     480    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     481    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     482
     483
     484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(), 1);
     485
     486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder);
     487    lz4AioK->setInitialArguments({mFileSize});
     488    mPxDriver.makeKernelCall(
     489            lz4AioK,
     490            {
     491                    mCompressedByteStream,
     492                    Extenders,
     493
     494                    // Block Data
     495                    BlockData_IsCompressed,
     496                    BlockData_BlockStart,
     497                    BlockData_BlockEnd
     498            }, {
     499                    decompressionByteStream
     500            });
     501
     502    return decompressionByteStream;
     503
     504}
    461505
    462506StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     
    466510    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    467511
    468     //// Generate Helper Markers Extenders, FX, XF
     512
     513    //// Generate Helper Markers Extenders
     514    /*
    469515    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    470516    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    471517    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    472518    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     519    */
    473520
    474521
     
    478525
    479526
    480     StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     527    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(), 1);
    481528
    482529    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
     
    486533            {
    487534                    mCompressedByteStream,
    488                     Extenders,
     535//                    Extenders,
    489536
    490537                    // Block Data
     
    590637
    591638int LZ4Generator::get4MbBufferBlocks() {
    592     return 4 * 1024 * 1024 / codegen::BlockSize;
     639    return 4 * 1024 * 1024 / codegen::BlockSize * 4; // TODO * 4 here in the end is a workaround for parallel aio pipeline
    593640}
    594641
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6059 r6064  
    3838    void generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    3939    // Pipeline
     40    virtual void generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4041    virtual void generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4142    virtual void generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     43    virtual parabix::StreamSetBuffer * generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4244    virtual parabix::StreamSetBuffer * generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4345    virtual parabix::StreamSetBuffer * generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6062 r6064  
    613613}
    614614
     615void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex) {
     616    auto & iBuilder = mPxDriver.getBuilder();
     617    this->generateCountOnlyMainFunc(iBuilder);
     618
     619    this->generateLoadByteStreamAndBitStream(iBuilder);
     620    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder);
     621
     622
     623    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     624    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
     625//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
     626    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
     627
     628
     629    StreamSetBuffer * LineBreakStream;
     630    StreamSetBuffer * Matches;
     631    std::vector<re::RE*> res = {regex};
     632    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
     633
     634
     635//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     636//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
     637//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
     638
     639    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
     640    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
     641    mPxDriver.generatePipelineIR();
     642
     643    iBuilder->setKernel(matchCountK);
     644    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
     645    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
     646
     647    mPxDriver.deallocateBuffers();
     648
     649    iBuilder->CreateRet(matchedLineCount);
     650
     651    mPxDriver.finalizeObject();
     652}
     653
    615654void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
    616655    auto & iBuilder = mPxDriver.getBuilder();
     
    618657
    619658    // GeneratePipeline
    620     this->generateLoadByteStreamAndBitStream(iBuilder);
     659//    this->generateLoadByteStreamAndBitStream(iBuilder);
     660    this->generateLoadByteStream(iBuilder);
    621661    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
    622662
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r6062 r6064  
    3939
    4040    void generateAioPipeline(re::RE* regex);
     41    void generateParallelAioPipeline(re::RE* regex);
    4142
    4243    ScanMatchGrepMainFunctionType getScanMatchGrepMainFunction();
  • icGREP/icgrep-devel/icgrep/lz4_grep.cpp

    r6062 r6064  
    5050static cl::OptionCategory lz4GrepDebugFlags("LZ4 Grep Debug Flags", "lz4d debug options");
    5151static cl::opt<bool> aio("aio", cl::desc("Use All-in-One Approach for LZ4 Decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
     52static cl::opt<bool> parallelDecompression("parallel-decompression", cl::desc("Use parallel Approach for LZ4 Decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
    5253static cl::opt<bool> swizzledDecompression("swizzled-decompression", cl::desc("Use swizzle approach for decompression"), cl::init(false), cl::cat(lz4GrepDebugFlags));
    5354static cl::opt<bool> enableGather("enable-gather", cl::desc("Enable gather intrinsics for bitstream PDEP"), cl::init(false), cl::cat(lz4GrepDebugFlags));
     
    7778    LZ4GrepGenerator g(enableMultiplexing);
    7879    if (aio) {
    79         if (enableMultiplexing) {
     80        if (parallelDecompression) {
     81            g.generateParallelAioPipeline(re_ast);
     82        } else if (enableMultiplexing) {
    8083            g.generateMultiplexingSwizzledAioPipeline2(re_ast);
    8184        } else if (swizzledDecompression) {
Note: See TracChangeset for help on using the changeset viewer.