Ignore:
Timestamp:
Jul 23, 2018, 4:56:33 AM (9 months ago)
Author:
xwa163
Message:
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
Location:
icGREP/icgrep-devel/icgrep/kernels/lz4/aio
Files:
2 added
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bitstream_aio.cpp

    r6118 r6132  
    1717                                                 std::vector<unsigned> numsOfBitStreams,
    1818                                                 unsigned blockSize)
    19     : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize),
     19    : LZ4SequentialAioBaseKernel(b, "LZ4BitStreamAioKernel", blockSize),
    2020      mNumsOfBitStreams(numsOfBitStreams)
    2121    {
     
    4040
    4141    void LZ4BitStreamAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    42                                               llvm::Value *literalLength) {
     42                                              llvm::Value *literalLength, llvm::Value* blockStart) {
    4343        // Constant
    4444        ConstantInt* INT_64_0 = b->getInt64(0);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bitstream_aio.h

    r6118 r6132  
    1212    protected:
    1313        virtual void doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    14                                    llvm::Value *literalLength) override;
     14                                   llvm::Value *literalLength, llvm::Value* blockStart) override;
    1515        virtual void doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    1616                                 llvm::Value *matchLength) override;
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bytestream_aio.cpp

    r6111 r6132  
    1515
    1616namespace kernel{
     17    std::string LZ4ByteStreamAioKernel::getCopyByteStreamName() {
     18        return mCopyOtherByteStream ? "targetByteStream" : "byteStream";
     19    }
    1720
    18     LZ4ByteStreamAioKernel::LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, unsigned blockSize)
    19             : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize) {
     21    LZ4ByteStreamAioKernel::LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream, unsigned blockSize)
     22            : LZ4SequentialAioBaseKernel(b, "LZ4ByteStreamAioKernel", blockSize),
     23              mCopyOtherByteStream(copyOtherByteStream) {
    2024        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(1, 8), "outputStream", BoundedRate(0, 1)});
     25        this->addScalar(b->getInt8PtrTy(), "temporaryInputPtr");
     26        if (copyOtherByteStream) {
     27            mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 8), "targetByteStream", RateEqualTo("byteStream")});
     28        }
    2129    }
    2230
    2331    void LZ4ByteStreamAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    24                                                llvm::Value *literalLength) {
     32                                               llvm::Value *literalLength, llvm::Value* blockStart) {
    2533        unsigned fw = 64;
    2634        Type* INT_FW_PTR = b->getIntNTy(fw)->getPointerTo();
    2735
    28         Value* inputBytePtr = b->getRawInputPointer("byteStream", literalStart);
     36        Value* inputBytePtr = b->getScalarField("temporaryInputPtr");
     37        inputBytePtr = b->CreateGEP(inputBytePtr, b->CreateSub(literalStart, blockStart));
     38
    2939        Value* inputPtr = b->CreatePointerCast(inputBytePtr, INT_FW_PTR);
    3040
     
    3343        Value* outputPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
    3444        outputPtr = b->CreatePointerCast(outputPtr, INT_FW_PTR);
    35 
    36         // We can always assume that we have enough output buffer based on our output buffer allocation strategy (except in extract only case)
    3745
    3846        BasicBlock* entryBlock = b->GetInsertBlock();
     
    120128    }
    121129
     130    void LZ4ByteStreamAioKernel::initializationMethod(const std::unique_ptr<KernelBuilder> &b) {
     131        b->setScalarField("temporaryInputPtr", b->CreateMalloc(b->getSize(mBlockSize)));
     132    }
     133
     134    void LZ4ByteStreamAioKernel::prepareProcessBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockStart, llvm::Value* blockEnd) {
     135        Value* rawInputPtr = b->CreatePointerCast(b->getRawInputPointer(this->getCopyByteStreamName(), b->getSize(0)), b->getInt8PtrTy());
     136        Value* inputCapacity = b->getCapacity(this->getCopyByteStreamName());
     137
     138        Value* blockStartRem = b->CreateURem(blockStart, inputCapacity);
     139        Value* remSize = b->CreateSub(inputCapacity, blockStartRem);
     140
     141        Value* blockSize = b->CreateSub(blockEnd, blockStart);
     142
     143        Value* copySize1 = b->CreateUMin(remSize, blockSize);
     144        Value* copySize2 = b->CreateSub(blockSize, copySize1);
     145
     146        Value* temporayInputPtr = b->getScalarField("temporaryInputPtr");
     147
     148        b->CreateMemCpy(temporayInputPtr, b->CreateGEP(rawInputPtr, blockStartRem), copySize1, 1);
     149        b->CreateMemCpy(b->CreateGEP(temporayInputPtr, copySize1), rawInputPtr, copySize2, 1);
     150    }
     151
     152    void LZ4ByteStreamAioKernel::beforeTermination(const std::unique_ptr<KernelBuilder> &b) {
     153        b->CreateFree(b->getScalarField("temporaryInputPtr"));
     154//        b->CallPrintInt("beforeTermination", b->getSize(0));
     155    }
     156
    122157}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_bytestream_aio.h

    r6111 r6132  
    99    class LZ4ByteStreamAioKernel : public LZ4SequentialAioBaseKernel {
    1010    public:
    11         LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, unsigned blockSize = 4 * 1024 * 1024);
     11        LZ4ByteStreamAioKernel(const std::unique_ptr<kernel::KernelBuilder> &b, bool copyOtherByteStream = false, unsigned blockSize = 4 * 1024 * 1024);
     12
    1213
    1314    protected:
    1415        virtual void doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    15                                    llvm::Value *literalLength) override;
     16                                   llvm::Value *literalLength, llvm::Value* blockStart) override;
    1617        virtual void doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    1718                                 llvm::Value *matchLength) override;
    1819        virtual void setProducedOutputItemCount(const std::unique_ptr<KernelBuilder> &b, llvm::Value* produced) override;
     20
     21        virtual void initializationMethod(const std::unique_ptr<KernelBuilder> &b) override;
     22        virtual void prepareProcessBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockStart, llvm::Value* blockEnd) override;
     23        virtual void beforeTermination(const std::unique_ptr<KernelBuilder> &b) override;
     24
     25    private:
     26        inline std::string getCopyByteStreamName();
     27        bool mCopyOtherByteStream;
     28
    1929    };
    2030
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_sequential_aio_base.cpp

    r6118 r6132  
    2727                    Binding{b->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed"), AlwaysConsume()},
    2828                    Binding{b->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed"), AlwaysConsume()}
    29 
    3029            },
    3130            //Outputs
     
    4342                                           Binding{b->getInt64Ty(), "outputPos"},
    4443
    45 
    46                                    }){
     44                                           Binding{b->getInt1Ty(), "hasCallInitialization"}
     45
     46
     47                                   }),
     48             mBlockSize(blockSize) {
    4749        this->setStride(blockSize);
    4850        addAttribute(MustExplicitlyTerminate());
     
    5153    // ---- Kernel Methods
    5254    void LZ4SequentialAioBaseKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &b) {
     55        Value* hasCallInitialization = b->getScalarField("hasCallInitialization");
     56
     57        BasicBlock* initializationBlock = b->CreateBasicBlock("initializationBlock");
     58        BasicBlock* entryBlock = b->CreateBasicBlock("entryBlock");
    5359        BasicBlock* exitBlock = b->CreateBasicBlock("exitBlock");
     60
     61        b->CreateLikelyCondBr(hasCallInitialization, entryBlock, initializationBlock);
     62
     63        // ---- initializationBlock
     64        b->SetInsertPoint(initializationBlock);
     65        b->setScalarField("hasCallInitialization", b->getInt1(true));
     66        this->initializationMethod(b);
     67        b->CreateBr(entryBlock);
     68
     69        // ---- entryBlock
     70        b->SetInsertPoint(entryBlock);
    5471        BasicBlock* blockEndConBlock = b->CreateBasicBlock("blockEndConBlock");
    5572
     
    7289
    7390        b->SetInsertPoint(processBlock);
    74 
    7591        //TODO handle uncompressed block
     92        this->prepareProcessBlock(b, blockStart, blockEnd);
     93
    7694        this->processCompressedLz4Block(b, blockStart, blockEnd);
     95
    7796        this->storePendingOutput(b);
    7897
     
    87106        b->CreateBr(exitBlock);
    88107
     108        // ---- exitBlock
    89109        b->SetInsertPoint(exitBlock);
     110
     111        BasicBlock* beforeTerminationBlock = b->CreateBasicBlock("beforeTerminationBlock");
     112        BasicBlock* terminationBlock = b->CreateBasicBlock("terminationBlock");
     113
     114        b->CreateUnlikelyCondBr(b->getTerminationSignal(), beforeTerminationBlock, terminationBlock);
     115
     116        // ---- beforeTerminationBlock
     117        b->SetInsertPoint(beforeTerminationBlock);
     118        this->beforeTermination(b);
     119        b->CreateBr(terminationBlock);
     120
     121        // ---- terminationBlock
     122        b->SetInsertPoint(terminationBlock);
    90123    }
    91124
     
    116149        b->SetInsertPoint(processBody);
    117150        /*
    118         auto accelerationRet = this->doAcceleration(b, phiCursorValue, lz4BlockEnd);
     151        auto accelerationRet = this->doAcceleration(b, phiCursorValue, lz4BlockStart, lz4BlockEnd);
    119152        Value* tokenMarkers = accelerationRet.first.first;
    120153
     
    125158        nextTokenGlobalPos = this->processLz4Sequence(b, nextTokenGlobalPos, lz4BlockEnd);
    126159        */
    127         Value* nextTokenGlobalPos = this->processLz4Sequence(b, phiCursorValue, lz4BlockEnd);
     160        Value* nextTokenGlobalPos = this->processLz4Sequence(b, phiCursorValue, lz4BlockStart, lz4BlockEnd);
    128161        phiCursorValue->addIncoming(nextTokenGlobalPos, b->GetInsertBlock());
    129162        b->CreateBr(processCon);
     
    133166
    134167    std::pair<std::pair<llvm::Value *, llvm::Value *>, llvm::Value *>
    135     LZ4SequentialAioBaseKernel::doAcceleration(const std::unique_ptr<KernelBuilder> &b, llvm::Value *beginTokenPos,
    136                                      llvm::Value *blockEnd) {
     168    LZ4SequentialAioBaseKernel::doAcceleration(
     169            const std::unique_ptr<KernelBuilder> &b,
     170            llvm::Value *beginTokenPos,
     171            llvm::Value *blockStart,
     172            llvm::Value *blockEnd) {
    137173        BasicBlock* entryBlock = b->GetInsertBlock();
    138174
     
    229265        // TODO all of the literal data here will always be in the same 64-bit literal block, it may be better if we provide
    230266        //      this information to the literal copy method, especially when we are working with swizzled form
    231         this->doAccelerationLiteralCopy(b, literalStartGlobalPos, literalLength);
     267        this->doAccelerationLiteralCopy(b, literalStartGlobalPos, literalLength, blockStart);
    232268        this->doAccelerationMatchCopy(b, matchOffset, matchLength);
    233269
     
    246282    }
    247283
    248     llvm::Value *LZ4SequentialAioBaseKernel::processLz4Sequence(const std::unique_ptr<KernelBuilder> &b,
    249                                                       llvm::Value *beginTokenPos,
    250                                                       llvm::Value *lz4BlockEnd) {
     284    llvm::Value *LZ4SequentialAioBaseKernel::processLz4Sequence(
     285            const std::unique_ptr<KernelBuilder> &b,
     286            llvm::Value *beginTokenPos,
     287            llvm::Value *lz4BlockStart,
     288            llvm::Value *lz4BlockEnd) {
    251289        // Constant
    252290        ConstantInt* SIZE_0 = b->getSize(0);
     
    307345
    308346        // This literal copy will always cross 64 bits literal boundary
    309         this->doLiteralCopy(b, literalStartPos, literalLength);
     347        this->doLiteralCopy(b, literalStartPos, literalLength, lz4BlockStart);
    310348        BasicBlock* extendLiteralEndFinal = b->GetInsertBlock();
    311349
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_sequential_aio_base.h

    r6118 r6132  
    2828    // ---- Constant
    2929    const static unsigned int ACCELERATION_WIDTH = 64;
     30    const unsigned mBlockSize;
    3031
    3132    // ---- Kernel Methods
     
    3637                                   llvm::Value *lz4BlockEnd);
    3738
    38     std::pair<std::pair<llvm::Value *, llvm::Value *>, llvm::Value *>
    39     doAcceleration(const std::unique_ptr<KernelBuilder> &b, llvm::Value *beginTokenPos,
    40                    llvm::Value *blockEnd);
     39    std::pair<std::pair<llvm::Value *, llvm::Value *>, llvm::Value *> doAcceleration(
     40            const std::unique_ptr<KernelBuilder> &b,
     41            llvm::Value *beginTokenPos,
     42            llvm::Value *blockStart,
     43            llvm::Value *blockEnd);
    4144
    4245
    43     virtual llvm::Value *processLz4Sequence(const std::unique_ptr<KernelBuilder> &b,
    44                                     llvm::Value *beginTokenPos, llvm::Value *lz4BlockEnd);
     46    virtual llvm::Value *processLz4Sequence(
     47            const std::unique_ptr<KernelBuilder> &b,
     48            llvm::Value *beginTokenPos,
     49            llvm::Value *lz4BlockStart,
     50            llvm::Value *lz4BlockEnd
     51    );
    4552
    4653    std::pair<llvm::Value*, llvm::Value*> parseMatchInfo(const std::unique_ptr<KernelBuilder> &b, llvm::Value* matchOffsetBeginPos, llvm::Value* tokenValue);
     
    8693    // ---- Methods To Be Override
    8794
     95    virtual void initializationMethod(const std::unique_ptr<KernelBuilder> &b){};
     96    virtual void prepareProcessBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockStart, llvm::Value* blockEnd){};
     97    virtual void beforeTermination(const std::unique_ptr<KernelBuilder> &b){};
     98
    8899
    89100    virtual void doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    90                                llvm::Value *literalLength) = 0;
     101                               llvm::Value *literalLength, llvm::Value* blockStart) = 0;
    91102
    92103    virtual void doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
     
    98109    virtual void prepareAcceleration(const std::unique_ptr<KernelBuilder> &b, llvm::Value* beginTokenPos) {};
    99110    virtual void doAccelerationLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    100                                            llvm::Value *literalLength) {this->doLiteralCopy(b, literalStart, literalLength);}
     111                                           llvm::Value *literalLength, llvm::Value* blockStart) {this->doLiteralCopy(b, literalStart, literalLength, blockStart);}
    101112    virtual void doAccelerationMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    102113                                         llvm::Value *matchLength) {this->doMatchCopy(b, matchOffset, matchLength);}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_swizzled_aio.cpp

    r6111 r6132  
    5757
    5858    void LZ4SwizzledAioKernel::doAccelerationLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    59                                            llvm::Value *literalLength) {
     59                                           llvm::Value *literalLength, llvm::Value* blockStart) {
    6060//        this->handleAccelerationLiteralCopy(b, literalStart, literalLength, inputValuesVector);
    6161
     
    512512
    513513    void LZ4SwizzledAioKernel::doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    514                                              llvm::Value *literalLength) {
     514                                             llvm::Value *literalLength, llvm::Value* blockStart) {
    515515        Value* SIZE_64  = b->getSize(64);
    516516        Value* SIZE_0 = b->getSize(0);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/aio/lz4_swizzled_aio.h

    r6111 r6132  
    5555
    5656        virtual void doLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    57                                    llvm::Value *literalLength);
     57                                   llvm::Value *literalLength, llvm::Value* blockStart) override;
    5858        virtual void doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    5959                                 llvm::Value *matchLength);
     
    6363        virtual void prepareAcceleration(const std::unique_ptr<KernelBuilder> &b, llvm::Value* beginTokenPos) override;
    6464        virtual void doAccelerationLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *literalStart,
    65                                                              llvm::Value *literalLength) override;
     65                                                             llvm::Value *literalLength, llvm::Value* blockStart) override;
    6666        virtual void doAccelerationMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    6767                                                           llvm::Value *matchLength) override;
Note: See TracChangeset for help on using the changeset viewer.