Ignore:
Timestamp:
Apr 18, 2018, 3:30:28 AM (17 months ago)
Author:
xwa163
Message:
  1. Use i1 bit stream instead of i64 number stream in M0 related streams and Match Offset related stream
  2. Improve the performance of lz4_index_builder
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5967 r5974  
    4545
    4646           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
    47            Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
    48            Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
    49            Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
    50            Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)}
     47           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
     48           Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
     49           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
    5150    },
    5251    //Arguments
     
    5857    {
    5958           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
    60            Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
     59           Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
     60           Binding{iBuilder->getInt64Ty(), "compressedSpaceClearPos"}
    6161    }) {
    6262        this->setStride(4 * 1024 * 1024);
     
    6565
    6666    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
     67//        iBuilder->CallPrintInt("IndexBuilder:entry", iBuilder->getSize(0));
    6768
    6869        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
     
    104105
    105106    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
     107//        iBuilder->CallPrintInt("blockEnd", blockEnd);
    106108        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    107109
    108110        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
    109111
     112        BasicBlock* extendLiteralLengthCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
    110113        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
    111114        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
    112115
    113         iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
     116        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
     117
     118        iBuilder->SetInsertPoint(extendLiteralLengthCon);
     119
     120        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(iBuilder->CreateLoad(
     121                iBuilder->getRawInputPointer("byteStream", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)))),
     122                                                            iBuilder->getInt8(0xff)), extendLiteralLengthExit,
     123                                     extendLiteralLengthBody);
     124
    114125
    115126        iBuilder->SetInsertPoint(extendLiteralLengthBody);
    116         Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
     127        Value* newCursorPos2 = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
    117128        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
    118129
     
    120131
    121132        iBuilder->SetInsertPoint(extendLiteralLengthExit);
    122 
    123         PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
    124         phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
     133//        PHINode* newCursorPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     134//        newCursorPos->addIncoming(a, extendLiteralLengthCon);
     135//        newCursorPos->addIncoming(newCursorPos2, advanceFinishBlock);
     136
     137        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
     138        phiCursorPosAfterLiteral->addIncoming(iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), extendLiteralLengthCon);
     139        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
    125140        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
    126141
     
    229244                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
    230245        );
    231         this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos);
    232         this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos);
    233         this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset);
     246        iBuilder->setProducedItemCount("M0CountMarker", iBuilder->CreateAdd(iBuilder->getProducedItemCount("M0CountMarker"), iBuilder->getSize(1)));
     247        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
     248//        iBuilder->CallPrintInt("offsetPos", offsetPos);
     249//        iBuilder->CallPrintInt("matchOffset", matchOffset);
     250
     251
    234252        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
    235253        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
     
    240258    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    241259        // Constant
    242 
    243         this->clearCircularOutputBitstream(iBuilder, "deletionMarker", blockStart, blockEnd);
     260        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
     261        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
     262        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
     263        this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
     264        iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
    244265
    245266        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     
    302323        // Store final M0 pos to make sure the bit stream will be long enough
    303324        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
    304         this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos);
    305         this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos);
    306         this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0));
    307325        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
    308326        // finalM0OutputPos should always be 4MB * n except for the final block
     
    316334    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
    317335
    318         unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
    319         Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
    320         Type* bitBlockType = iBuilder->getBitBlockType();
    321         Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
     336        Constant* SIZE_64 = iBuilder->getSize(64);
    322337
    323338        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     
    339354        iBuilder->SetInsertPoint(advanceBodyBlock);
    340355
    341         Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH));
    342         Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    343 
    344         Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo());
    345 
    346         Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
    347         currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
     356        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
     357        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
     358        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
     359
     360        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
     361        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
     362
     363        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
    348364        currentBitValue = iBuilder->CreateNot(currentBitValue);
    349365
    350366        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
    351367        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
    352         newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
     368        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
    353369
    354370        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
     
    360376        }
    361377
    362         phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock());
     378        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
    363379        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
    364380        iBuilder->CreateBr(advanceConBlock);
     
    402418                                                             const std::string &bitstreamName,
    403419                                                             llvm::Value *start, llvm::Value *end) {
    404         //TODO currently we assume that start/end pos is not in the same byte
     420        //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
    405421        Value* SIZE_0 = iBuilder->getSize(0);
    406422        Value* SIZE_8 = iBuilder->getSize(8);
     
    477493        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
    478494
    479         Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
     495        Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
    480496        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
    481497
     
    541557
    542558        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
    543         // TODO bug here when start end in the same byte
    544 //        iBuilder->CallPrintInt("memsetEndByte", memsetEndByte);
    545 //        iBuilder->CallPrintInt("memsetStartByte", memsetStartByte);
    546 //        iBuilder->CallPrintInt("memsetSize1_1", memsetSize);
    547 
    548559
    549560        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
     
    555566        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
    556567        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
    557 //        iBuilder->CallPrintInt("memset1Ptr", iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem));
    558 //        iBuilder->CallPrintInt("memsetSize1", memsetSize1);
    559 
    560 //        iBuilder->CallPrintInt("memset2Ptr", rawOutputPtr);
    561 //        iBuilder->CallPrintInt("memsetSize2", memsetSize2);
     568
    562569        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
    563570        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
     
    567574    }
    568575
     576    void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
     577        Value* SIZE_0 = iBuilder->getSize(0);
     578        Value* SIZE_8 = iBuilder->getSize(8);
     579        Value* INT8_1 = iBuilder->getInt8(1);
     580        Type* bytePtrType = iBuilder->getInt8PtrTy();
     581
     582        Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
     583
     584        Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
     585        bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
     586        Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
     587
     588        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
     589        Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
     590
     591        Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
     592        targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
     593        iBuilder->CreateStore(targetValue, outputTargetPtr);
     594
     595        Value* a = iBuilder->CreateURem(iBuilder->CreateUDiv(pos, iBuilder->getSize(iBuilder->getBitBlockWidth())), iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks()));
     596        Value* p = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), iBuilder->getBitBlockType()->getPointerTo());
     597//        iBuilder->CallPrintInt("--pos", pos);
     598//        iBuilder->CallPrintRegister("aa", iBuilder->CreateLoad(iBuilder->CreateGEP(p, a)));
     599
     600    }
     601
    569602}
Note: See TracChangeset for help on using the changeset viewer.