Changeset 5507


Ignore:
Timestamp:
Jun 13, 2017, 10:37:16 PM (23 months ago)
Author:
cameron
Message:

Update radix64/expand34 to use multiblock kernel

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5506 r5507  
    810810    Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf");
    811811    Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);
    812 
    813812    ConstantInt * blockSize = kb->getSize(kb->getBitBlockWidth());
    814813    ConstantInt * strideSize = kb->getSize(mStride);
     
    10201019
    10211020    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {
    1022         Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(i));
     1021        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, {kb->getInt32(0), kb->getInt32(i)});
    10231022        Type * bufPtrType = mStreamSetInputBuffers[i]->getPointerType();
    10241023        tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
     
    10811080    Value * outputBasePos[outputSetCount];
    10821081    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    1083         Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
     1082        Value * tempBufPtr = kb->CreateGEP(tempParameterArea,  {kb->getInt32(0), kb->getInt32(mStreamSetInputs.size() + i)});
    10841083        Type * bufPtrType = mStreamSetOutputBuffers[i]->getPointerType();
    10851084        tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
     
    11031102    // Copy back data to the actual output buffers.
    11041103    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    1105         Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
     1104        Value * tempBufPtr = kb->CreateGEP(tempParameterArea,  {kb->getInt32(0), kb->getInt32(mStreamSetInputs.size() + i)});
    11061105        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    11071106        Value * finalOutputItems = kb->getProducedItemCount(mStreamSetOutputs[i].name);
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5440 r5507  
    3939// a continous buffer for the full segment (number of blocks).
    4040
    41 void expand3_4Kernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     41void expand3_4Kernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & iBuilder) {
    4242
    4343    BasicBlock * expand2_3entry = iBuilder->GetInsertBlock();
     
    5252    BasicBlock * step3store = iBuilder->CreateBasicBlock("step3store");
    5353    BasicBlock * step3store2 = iBuilder->CreateBasicBlock("step3store2");
    54     BasicBlock * itemsDone = iBuilder->CreateBasicBlock("itemsDone");
    5554    BasicBlock * expand3_4_final = iBuilder->CreateBasicBlock("expand3_4_final");
    5655    BasicBlock * expand3_4_exit = iBuilder->CreateBasicBlock("expand3_4_exit");
     
    7776    }
    7877
    79     Constant * tripleBlockSize = iBuilder->getSize(3 * iBuilder->getStride());
     78    Constant * tripleBlockSize = iBuilder->getSize(getKernelStride());
    8079    Constant * packSize = iBuilder->getSize(PACK_SIZE);
    8180    Constant * triplePackSize = iBuilder->getSize(3 * PACK_SIZE); // 3 packs per loop.
     
    8483    const unsigned packAlign = iBuilder->getBitBlockWidth()/8;
    8584
    86     Value * processed = iBuilder->getProcessedItemCount("sourceStream");
    87     Value * available = iBuilder->getAvailableItemCount("sourceStream");
    88     Value * itemsAvail = iBuilder->CreateSub(available, processed);
    89    
    90     //
     85    Function::arg_iterator args = mCurrentMethod->arg_begin();
     86   
     87    /* self = */ args++;
     88    Value * itemsToDo = &*(args++);
     89    Value * sourceStream = &*(args++);
     90    Value * expandedStream = &*(args);
     91    Value * isFinal = iBuilder->CreateICmpULT(itemsToDo, tripleBlockSize);
     92
    9193    // The main loop processes 3 packs of data at a time.  For doFinal
    9294    // processing, process all the remaining sets of 3 packs, otherwise
    9395    // process in multiples of 3 full blocks of data.
    9496    //
    95     Value * loopDivisor = iBuilder->CreateSelect(getIsFinal(), triplePackSize, tripleBlockSize);
    96     Value * excessItems = iBuilder->CreateURem(itemsAvail, loopDivisor);
    97     Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
    98 
    99     // A block is made up of 8 packs.  Get the pointer to the first pack (changes the type of the pointer only).
    100     Value * sourcePackPtr = iBuilder->getInputStreamPackPtr("sourceStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
    101     Value * outputPackPtr = iBuilder->getOutputStreamPackPtr("expandedStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     97    Value * excessItems = iBuilder->CreateURem(itemsToDo, triplePackSize);
     98    Value * loopItemsToDo = iBuilder->CreateSub(itemsToDo, excessItems);
     99
     100    Value * sourcePackPtr = iBuilder->CreateBitCast(sourceStream, iBuilder->getBitBlockType()->getPointerTo());
     101    Value * outputPackPtr = iBuilder->CreateBitCast(expandedStream, iBuilder->getBitBlockType()->getPointerTo());
    102102
    103103    Value * hasFullLoop = iBuilder->CreateICmpUGE(loopItemsToDo, triplePackSize);
     
    112112    loopOutput_ptr->addIncoming(outputPackPtr, expand2_3entry);
    113113    loopItemsRemain->addIncoming(loopItemsToDo, expand2_3entry);
     114
    114115
    115116    // Step 1 of the main loop.
     
    154155    loopExitOutput_ptr->addIncoming(loopNextOutputPack, expand_3_4_loop);
    155156
    156     // Update the processed items count based on the loopItemsToDo value.
    157     processed = iBuilder->CreateAdd(processed, loopItemsToDo);
    158     iBuilder->setProcessedItemCount("sourceStream", processed);
    159 
    160 
    161157    // Except for final segment processing, we are done.
    162     iBuilder->CreateCondBr(getIsFinal(), expand3_4_final, expand3_4_exit);
     158    iBuilder->CreateCondBr(isFinal, expand3_4_final, expand3_4_exit);
    163159
    164160    // Final segment processing.   Less than a triplePack remains.
     
    176172    // (g) 9N/4+1 .. 3N - 1 remaining items: do Steps 1, 2 and 3.
    177173    Value * condition_a = iBuilder->CreateICmpEQ(excessItems, ConstantInt::getNullValue(iBuilder->getSizeTy()));
    178     iBuilder->CreateCondBr(condition_a, itemsDone, finalStep1);
     174    iBuilder->CreateCondBr(condition_a, expand3_4_exit, finalStep1);
    179175    // Final Step1 processing
    180176    iBuilder->SetInsertPoint(finalStep1);
     
    183179    iBuilder->CreateAlignedStore(expand0, loopExitOutput_ptr, packAlign);
    184180    Value * condition_b = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(3 * PACK_SIZE/4));
    185     iBuilder->CreateCondBr(condition_b, itemsDone, finalStep2);
     181    iBuilder->CreateCondBr(condition_b, expand3_4_exit, finalStep2);
    186182    // Final Step 2 processing
    187183    iBuilder->SetInsertPoint(finalStep2);
     
    200196    iBuilder->CreateAlignedStore(expand1, outPack1_ptr, packAlign);
    201197    Value * condition_d = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(6 * PACK_SIZE/4));
    202     iBuilder->CreateCondBr(condition_d, itemsDone, finalStep3);
     198    iBuilder->CreateCondBr(condition_d, expand3_4_exit, finalStep3);
    203199    // Final Step 3
    204200    iBuilder->SetInsertPoint(finalStep3);
     
    217213    iBuilder->CreateAlignedStore(expand2, outPack2_ptr, packAlign);
    218214    Value * condition_f = iBuilder->CreateICmpULE(excessItems, iBuilder->getSize(9 * PACK_SIZE/4));
    219     iBuilder->CreateCondBr(condition_f, itemsDone, step3store2);
     215    iBuilder->CreateCondBr(condition_f, expand3_4_exit, step3store2);
    220216    iBuilder->SetInsertPoint(step3store2);
    221217    outPack3_ptr = iBuilder->CreateGEP(loopExitOutput_ptr, iBuilder->getInt32(3));
    222218    expand3 = iBuilder->bitCast(iBuilder->CreateShuffleVector(pack2phi, undefPack, expand_3_4_shuffle[3]));
    223219    iBuilder->CreateAlignedStore(expand3, outPack3_ptr, packAlign);
    224     iBuilder->CreateBr(itemsDone);
     220    iBuilder->CreateBr(expand3_4_exit);
    225221    //
    226     iBuilder->SetInsertPoint(itemsDone);
    227     processed = iBuilder->CreateAdd(processed, excessItems);
    228     iBuilder->setProcessedItemCount("sourceStream", processed);
    229 
    230    
    231     iBuilder->CreateBr(expand3_4_exit);
    232222    iBuilder->SetInsertPoint(expand3_4_exit);
    233 }
     223    }
    234224
    235225
     
    392382
    393383expand3_4Kernel::expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
    394 : SegmentOrientedKernel("expand3_4",
     384: MultiBlockKernel("expand3_4",
    395385            {Binding{iBuilder->getStreamSetTy(1, 8), "sourceStream"}},
    396             {Binding{iBuilder->getStreamSetTy(1, 8), "expandedStream", FixedRatio(4,3)}},
     386            {Binding{iBuilder->getStreamSetTy(1, 8), "expand34Stream", FixedRatio(4,3)}},
    397387            {}, {}, {}) {
     388    setKernelStride(3 * iBuilder->getBitBlockWidth());
    398389}
    399390
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5464 r5507  
    1919    This is a useful preparatory transformation in various radix-64 encodings. */
    2020 
    21 class expand3_4Kernel final : public SegmentOrientedKernel {
     21class expand3_4Kernel final : public MultiBlockKernel {
    2222public:   
    2323    expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     
    2525    bool hasSignature() const override { return false; }
    2626private:
    27     void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) override;
     27    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder) override;
    2828};
    2929
Note: See TracChangeset for help on using the changeset viewer.