Ignore:
Timestamp:
Mar 13, 2018, 12:08:22 PM (18 months ago)
Author:
xwa163
Message:
  1. Fix some bugs in match copy kernel
  2. Remove some legacy codes from match copy kernel and sequential kernel
Location:
icGREP/icgrep-devel/icgrep/kernels/lz4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_extract_e_m0.cpp

    r5885 r5905  
    66#include <iostream>
    77
    8 //#define APPLY_64PACK_ACCELERATION
     8#define APPLY_64PACK_ACCELERATION
    99// TODO May be we can change it to 256 PACK Acceleration based on SIMD instruction
    1010
     
    687687        ) {
    688688    this->initBufferCursor(iBuilder, {"extender"});
    689     this->configIndexBits(iBuilder, inputIndexMap);
    690 //    this->configOutputBufferToBeClear({{"byteStream", "e1Marker"}});
    691 //    setNoTerminateAttribute(true);
    692689}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.cpp

    r5895 r5905  
    66#include <kernels/streamset.h>
    77#include <toolchain/toolchain.h>
    8 
    9 #define OUTPUT_BIT_STREAM_NAME "outputStream"
     8#include <llvm/Support/raw_ostream.h>
     9
     10#define OUTPUT_STREAM_NAME "outputStream"
    1011
    1112using namespace llvm;
     
    2021    Value *previousProcessed = iBuilder->getProcessedItemCount("decompressedStream");
    2122
    22 //    BasicBlock *entryBlock = iBuilder->GetInsertBlock();
    2323    Value *inputBasePtr = iBuilder->getInputStreamBlockPtr("decompressedStream", SIZE_ZERO);
    2424
    25     Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
     25    Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO);
    2626    Value *itemsToDo = mAvailableItemCount[0];
    27     Value *copySize = iBuilder->CreateUMin(
    28             itemsToDo,
    29             iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)
    30     );
    31 //    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
    32 //    iBuilder->CallPrintInt("itemsToDo1", mAvailableItemCount[1]);
    33 //    iBuilder->CallPrintInt("itemsToDo2", mAvailableItemCount[2]);
    34 //    iBuilder->CallPrintInt("itemsToDo3", mAvailableItemCount[3]);
    35 //    iBuilder->CallPrintInt("copySize", copySize);
     27    Value *copySize = iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH);
     28    Value* actualCopySize = iBuilder->CreateUMin(itemsToDo, copySize);
    3629
    3730    iBuilder->CreateMemCpy(
     
    3932            inputBasePtr,
    4033            copySize,
    41             1 // Not align guaranteed in final block
    42     );
    43 //    iBuilder->CallPrintInt("outputCpyPtr", outputBasePtr);
    44 //    iBuilder->CallPrintInt("outputBlocks", outputBlocks);
    45     Value *newProcessed = iBuilder->CreateAdd(previousProcessed, copySize);
     34            iBuilder->getBitBlockWidth()
     35    ); //It will be ok to always copy by full block
     36
     37    Value *newProcessed = iBuilder->CreateAdd(previousProcessed, actualCopySize);
    4638    iBuilder->setProcessedItemCount("decompressedStream", newProcessed);
    47     iBuilder->setProducedItemCount(OUTPUT_BIT_STREAM_NAME, newProcessed);
     39    iBuilder->setProducedItemCount(OUTPUT_STREAM_NAME, newProcessed);
    4840
    4941}
     
    6961    Value *lastDepositPosition = iBuilder->CreateAdd(lastM0, SIZE_ONE);
    7062
    71     // TODO maybe we can not use mIsFinal here
    7263    Value *currentMaxBlock = iBuilder->CreateSelect(
    7364            this->mIsFinalBlock,
     
    7869    // Produced Item Count will always be full bitblock except for final block
    7970    Value *previousProducedBlocks = iBuilder->CreateUDiv(
    80             iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME),
     71            iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME),
    8172            SIZE_BIT_BLOCK_WIDTH
    8273    );
     
    9485    Value *itemsToDo = mAvailableItemCount[0];
    9586
    96     Value *isFinalBlock =
    97             iBuilder->CreateOr(
    98                     iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(numOfStrides, SIZE_BIT_BLOCK_WIDTH)),
    99                     iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
    100             );
    101 
    102     this->mIsFinalBlock = isFinalBlock;
     87
    10388//    iBuilder->CallPrintInt("isFinalBlock", isFinalBlock);
    104     iBuilder->setTerminationSignal(isFinalBlock);
    105 
    106 
    107 
    108 
    109     Value *previousProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
     89
     90//    iBuilder->CallPrintInt("matchCopy:isFinalBlock", isFinalBlock);
     91
     92    Value *previousProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME);
    11093
    11194
    11295    // Space Calculation
    11396    Value *outputBufferBlocks = iBuilder->getSize(
    114             this->getAnyStreamSetBuffer(OUTPUT_BIT_STREAM_NAME)->getBufferBlocks());
    115     // TODO need to take previous produced size into account
    116 
    117 
     97            this->getAnyStreamSetBuffer(OUTPUT_STREAM_NAME)->getBufferBlocks());
     98
     99    Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO); // [8 x <4 x i64>]*
    118100    Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
    119             iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO),
    120             iBuilder->getBitBlockType()->getPointerTo());
    121     Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
     101            iBuilder->getRawOutputPointer(OUTPUT_STREAM_NAME, SIZE_ZERO), outputCurrentPtr->getType());
    122102    Value *producedOffset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
    123103    Value *remainSpace = iBuilder->CreateSub(outputBufferBlocks, producedOffset);
     
    134114    Value *outputBlocks = iBuilder->CreateUMin(writableBlocks, numOfStrides);
    135115    // outputBlock === min(writableBlocks, numOfStrides, (matchOffsetPosition + matchLength - producedItemCount) / bitBlockWidth )
    136 
    137     outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder));
    138 
     116//    iBuilder->CallPrintInt("outputBlocks1", outputBlocks);
     117
     118//    outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder)); //TODO need to handle final block, otherwise it may be deadloop when there is not match copy in final block
     119//    iBuilder->CallPrintInt("outputBlocks2", outputBlocks);
    139120
    140121//    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    141122
     123
     124    Value *isFinalBlock =
     125            iBuilder->CreateOr(
     126                    iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)),
     127                    iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
     128            );
     129
     130    this->mIsFinalBlock = isFinalBlock;
     131    iBuilder->setTerminationSignal(isFinalBlock);
    142132    // Output Copy
    143133    this->generateOutputCopy(iBuilder, outputBlocks);
    144 //    return;
    145 
    146     Value *newProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
     134
     135    Value *newProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME);
    147136
    148137    BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
     
    150139    iBuilder->SetInsertPoint(copyEndBlock);
    151140
    152     // TODO match Copy
     141    // Match Copy
    153142    BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
    154143
     
    238227    iBuilder->SetInsertPoint(matchCopyBodyBlock);
    239228    Value* matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
    240     Value* rawOutputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
    241 //    iBuilder->CallPrintInt("rawOutputBasePtr", rawOutputBasePtr);
    242 //    iBuilder->CallPrintInt("rawOutputBasePtr1", iBuilder->CreateGEP(
    243 //            rawOutputBasePtr,
    244 //            iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
    245 //    ));
    246     Value* matchCopyFromValue = iBuilder->CreateLoad(
    247             iBuilder->CreateGEP(
    248                     rawOutputBasePtr,
    249                     iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
    250             ));
     229    Value* rawOutputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(OUTPUT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
     230
     231    Value* outputBufferSize = iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH);
     232    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromPos, outputBufferSize);
     233    Value* matchCopyFromPtr = iBuilder->CreateGEP(rawOutputBasePtr, matchCopyFromOffset);
    251234
    252235    // Output is guranteed to be full bit block except for final block
    253     Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
     236    Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
    254237    Value* outputTargetPtr = iBuilder->CreateGEP(outputBlockBasePtr, iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
    255 //    iBuilder->CallPrintInt("matchCopyFromValue", matchCopyFromValue);
    256 //    iBuilder->CallPrintInt("phiMatchPos", phiMatchPos);
    257 //    iBuilder->CallPrintInt("aa", iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
    258     iBuilder->CreateStore(matchCopyFromValue, outputTargetPtr);
     238
     239    Value* matchCopyFromRemain = iBuilder->CreateSub(outputBufferSize, matchCopyFromOffset);
     240    // phiMatchOffset
     241    // phiMatchLength
     242    Value* currentCopySize = iBuilder->CreateUMin(matchCopyFromRemain, phiMatchOffset);
     243    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
     244    currentCopySize = iBuilder->CreateUMin(currentCopySize, iBuilder->CreateSub(newProducedItemCount, phiMatchPos));
     245
     246    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
     247
     248//    currentCopySize = SIZE_ONE;
     249    iBuilder->CreateMemCpy(outputTargetPtr, matchCopyFromPtr, currentCopySize, 0);
     250
     251//    iBuilder->CallPrintInt("outputTargetPtr", iBuilder->CreateGEP(iBuilder->CreateLoad(outputTargetPtr), iBuilder->CreateSub(currentCopySize, SIZE_ONE)));
     252//    iBuilder->CallPrintInt("matchCopyFromPtr", iBuilder->CreateGEP(iBuilder->CreateLoad(matchCopyFromPtr), iBuilder->CreateSub(currentCopySize, SIZE_ONE)));
    259253
    260254    phiProcessIndex->addIncoming(phiProcessIndex, iBuilder->GetInsertBlock());
    261255    phiMatchOffset->addIncoming(phiMatchOffset, iBuilder->GetInsertBlock());
    262     phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, SIZE_ONE), iBuilder->GetInsertBlock());
    263     phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, SIZE_ONE), iBuilder->GetInsertBlock());
     256    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, currentCopySize), iBuilder->GetInsertBlock());
     257    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, currentCopySize), iBuilder->GetInsertBlock());
    264258
    265259    iBuilder->CreateBr(matchCopyLoopCon);
     
    276270}
    277271
    278 
    279 void LZ4MatchCopyKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName,
    280                                                      Value *offset, Type *pointerType, Value *value) {
    281     size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
    282     Value *offsetMask = iBuilder->getSize(inputSize - 1);
    283     Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    284 
    285     Value *outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
    286 
    287     outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
    288     iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
    289 }
    290 
    291 Value *LZ4MatchCopyKernel::generateLoadCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
    292                                                       Value *offset, Type *pointerType) {
    293     size_t inputSize = this->getOutputBufferSize(iBuilder, inputBufferName);
    294     Value *offsetMask = iBuilder->getSize(inputSize - 1);
    295     Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    296 
    297     Value *inputBufferPtr = iBuilder->getRawOutputPointer(inputBufferName, iBuilder->getSize(0));
    298 
    299     inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
    300     return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
    301 }
    302 
    303 Value *LZ4MatchCopyKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
    304                                                      Value *offset, Type *pointerType) {
    305     size_t inputSize = this->getInputBufferSize(iBuilder, inputBufferName);
    306     Value *offsetMask = iBuilder->getSize(inputSize - 1);
    307     Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    308 
    309     Value *inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
    310 
    311     inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
    312     return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
    313 }
    314 
    315 size_t LZ4MatchCopyKernel::getInputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
    316     return this->getInputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
    317 }
    318 
    319 size_t LZ4MatchCopyKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
    320     return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
    321 }
    322 
    323272LZ4MatchCopyKernel::LZ4MatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
    324273        : MultiBlockKernel("lz4MatchCopyKernel",
     
    332281                           },
    333282        // Outputs
    334                            {Binding{iBuilder->getStreamSetTy(1, 8), OUTPUT_BIT_STREAM_NAME, BoundedRate(0, 1)}},
     283                           {Binding{iBuilder->getStreamSetTy(1, 8), OUTPUT_STREAM_NAME, BoundedRate(0, 1)}},
    335284        // Arguments
    336285                           {},
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.h

    r5895 r5905  
    1515    protected:
    1616        void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides) override;
    17 //        void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    1817    private:
    19         llvm::Value* generateLoadCircularInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName, llvm::Value* offset, llvm::Type* pointerType);
    20         llvm::Value* generateLoadCircularOutput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName, llvm::Value* offset, llvm::Type* pointerType);
    21         void generateStoreCircularOutput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string outputBufferName, llvm::Value* offset, llvm::Type* pointerType, llvm::Value* value);
    22 
    23         size_t getInputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
    24         size_t getOutputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
    2518        void generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* outputBlocks);
    2619
Note: See TracChangeset for help on using the changeset viewer.