Changeset 5966 for icGREP


Ignore:
Timestamp:
Apr 11, 2018, 2:35:19 PM (12 months ago)
Author:
xwa163
Message:

Convert lz4_swizzled_match_copy_kernel to SegmentOrientedKernel?

Location:
icGREP/icgrep-devel/icgrep/kernels/lz4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5950 r5966  
    1515Value* LZ4SwizzledMatchCopyKernel::loadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string bufferName, Value* offset) {
    1616    // GEP here is safe
    17     Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    18     Value* inputLocalBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
    19     Value* inputLocalBlockOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
    20 
    21     Value* blockBasePtr = iBuilder->getInputStreamBlockPtr(bufferName, iBuilder->getSize(0), inputLocalBlockIndex);
    22     blockBasePtr = iBuilder->CreatePointerCast(blockBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    23     // GEP here is safe
    24 
    2517    Constant* SIZE_ZERO = iBuilder->getSize(0);
    2618    Type* int64PtrType = iBuilder->getInt64Ty()->getPointerTo();
     
    3022    Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
    3123
    32 
    3324    return iBuilder->CreateLoad(ptr2);
    3425}
    3526
    36 void LZ4SwizzledMatchCopyKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides)  {
     27void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     28//void LZ4SwizzledMatchCopyKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides)  {
    3729    // Const
    3830    Constant *SIZE_ZERO = iBuilder->getSize(0);
     
    4335    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    4436
    45     Value *itemsToDo = mAvailableItemCount[3];
    46     Value *processedItemCount = mInitialProcessedItemCount[3];
    47     Value *totalItemCount = iBuilder->CreateAdd(itemsToDo, processedItemCount);
     37    Value *totalItemCount = iBuilder->getAvailableItemCount("sourceStreamSet0");
     38    Value *itemsToDo = iBuilder->CreateSub(totalItemCount, iBuilder->getProcessedItemCount("sourceStreamSet0"));
     39
    4840    Value *isFinalBlock = iBuilder->CreateICmpULT(itemsToDo, iBuilder->getSize(4 * 1024 * 1024));
    4941    this->mIsFinalBlock = isFinalBlock;
     
    7870
    7971    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
    80     Value *totalM0StartItemsCount = iBuilder->CreateAdd(initM0StartProcessIndex, mAvailableItemCount[0]);
     72    Value *totalM0StartItemsCount = iBuilder->getAvailableItemCount("m0Start");
    8173
    8274    Value *initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
     
    157149
    158150
    159 
    160     Value* matchCopyTargetBlockIndex = iBuilder->CreateUDiv(iBuilder->CreateSub(phiMatchPos, previousProducedItemCount), SIZE_PDEP_WIDTH);
    161     Value* matchCopyTargetBlockOffset = iBuilder->CreateURem(phiMatchPos, SIZE_PDEP_WIDTH);
     151    Value* matchCopyTargetOffset = iBuilder->CreateURem(phiMatchPos, outputBufferSize);
     152    Value* matchCopyTargetBlockIndex = iBuilder->CreateUDiv(matchCopyTargetOffset, SIZE_PDEP_WIDTH);
     153    Value* matchCopyTargetBlockOffset = iBuilder->CreateURem(matchCopyTargetOffset, SIZE_PDEP_WIDTH);
    162154
    163155
     
    189181        Value* copiedValue = iBuilder->simd_and(fromBlockValue, fullMask);
    190182
    191         Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
    192 
    193         Value* outputTargetBlockPtr = iBuilder->CreateGEP(outputBlockBasePtr, matchCopyTargetBlockIndex);
     183        Value* outputTargetBlockPtr = iBuilder->CreateGEP(rawOutputBasePtr, matchCopyTargetBlockIndex);
     184
    194185//        iBuilder->CallPrintInt("outputTargetBlockPtr", outputTargetBlockPtr);
    195186        Value* targetOriginalValue = iBuilder->CreateLoad(outputTargetBlockPtr);
     
    237228    Value *SIZE_ZERO = iBuilder->getSize(0);
    238229    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     230    Constant *INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(iBuilder->getBitBlockWidth());
    239231    Type* bytePtrType = iBuilder->getInt8PtrTy();
    240232
    241233    Value *previousProcessed = iBuilder->getProcessedItemCount("sourceStreamSet0");
    242234
    243 
    244     Value *itemsToDo = mAvailableItemCount[3];
     235    Value *itemsToDo = iBuilder->CreateSub(iBuilder->getAvailableItemCount("sourceStreamSet0"), iBuilder->getProcessedItemCount("sourceStreamSet0"));
    245236    Value *copySize = iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH);
    246237    Value* actualCopySize = iBuilder->CreateUMin(itemsToDo, copySize);
    247238    Value* copyByte = iBuilder->CreateUDivCeil(iBuilder->CreateMul(copySize, iBuilder->getSize(mStreamCount)), iBuilder->getSize(8)); // i8
    248239
     240    Value* outputBufferSize = iBuilder->getSize(this->getAnyStreamSetBuffer("sourceStreamSet0")->getBufferBlocks() * iBuilder->getBitBlockWidth());
     241    Value* inputOffset = iBuilder->CreateMul(
     242            iBuilder->CreateAnd(iBuilder->CreateURem(previousProcessed, outputBufferSize), ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH)), iBuilder->getInt64(mStreamCount)
     243    );
    249244
    250245    for (int i = 0; i < mStreamSize; i++) {
    251         Value *inputBasePtr = iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_ZERO);
    252         Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO);
     246
     247        Value * inputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer("sourceStreamSet" + std::to_string(i), inputOffset), iBuilder->getBitBlockType()->getPointerTo());
     248        Value * outputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), inputOffset), iBuilder->getBitBlockType()->getPointerTo());
     249
    253250        iBuilder->CreateMemCpy(
    254251                iBuilder->CreatePointerCast(outputBasePtr, bytePtrType),
     
    264261}
    265262
    266 Value* LZ4SwizzledMatchCopyKernel::getMaximumMatchCopyBlock(const std::unique_ptr<KernelBuilder> &iBuilder) {
    267     Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    268     Value *SIZE_ZERO = iBuilder->getSize(0);
    269     Value *SIZE_ONE = iBuilder->getSize(1);
    270     Value *m0EndInitOffset = iBuilder->CreateURem(iBuilder->getProcessedItemCount("m0End"), SIZE_BIT_BLOCK_WIDTH);
    271     Value *m0EndItemsToDo = mAvailableItemCount[1];
    272     Value *m0EndBasePtr = iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO);
    273     m0EndBasePtr = iBuilder->CreatePointerCast(m0EndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    274     Value *lastM0 = iBuilder->CreateLoad(
    275             iBuilder->CreateGEP(
    276                     m0EndBasePtr,
    277                     iBuilder->CreateSub(
    278                             iBuilder->CreateAdd(m0EndInitOffset, m0EndItemsToDo),
    279                             SIZE_ONE
    280                     )
    281 
    282             )
    283     );
    284     Value *lastDepositPosition = iBuilder->CreateAdd(lastM0, SIZE_ONE);
    285 
    286     Value *currentMaxBlock = iBuilder->CreateSelect(
    287             this->mIsFinalBlock,
    288             iBuilder->CreateUDivCeil(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH),
    289             iBuilder->CreateUDiv(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH)
    290     );
    291 
    292     // Produced Item Count will always be full bitblock except for final block
    293     Value *previousProducedBlocks = iBuilder->CreateUDiv(
    294             iBuilder->getProducedItemCount("outputStreamSet0"),
    295             SIZE_BIT_BLOCK_WIDTH
    296     );
    297 
    298     // (m0 + 1) / BitBlockWidth - produceItemCount / BitBlockWidth
    299     return iBuilder->CreateSub(currentMaxBlock, previousProducedBlocks);
    300 }
    301 
    302263LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
    303         : MultiBlockKernel("LZ4SwizzledMatchCopyKernel",
     264        : SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
    304265        // Inputs
    305266                           {
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r5941 r5966  
    1212
    1313namespace kernel {
    14     class LZ4SwizzledMatchCopyKernel: public MultiBlockKernel {
     14    class LZ4SwizzledMatchCopyKernel final: public SegmentOrientedKernel {
    1515    public:
    1616        LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width = 64);
    1717    protected:
    18         void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides) override;
     18//        void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides) override;
     19        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
    1920    private:
    2021
     
    2829        void generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* outputBlocks);
    2930
    30         llvm::Value* getMaximumMatchCopyBlock(const std::unique_ptr<KernelBuilder> &iBuilder);
    3131        llvm::Value* mIsFinalBlock;
    3232        llvm::Value* loadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName, llvm::Value* offset);
Note: See TracChangeset for help on using the changeset viewer.