Changeset 5635


Ignore:
Timestamp:
Sep 13, 2017, 11:50:27 AM (9 days ago)
Author:
cameron
Message:

PDEP kernel: only process blocks if sufficient source stream data available

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5630 r5635  
    10401040    }
    10411041
    1042     Value * nowProcessed = kb->CreateAdd(processedItemCount[0], linearlyAvailItems);
    1043     kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed);
    10441042    Value * reducedStridesToDo = kb->CreateSub(stridesRemaining, linearlyWritableStrides);
    10451043    stridesRemaining->addIncoming(reducedStridesToDo, kb->GetInsertBlock());
    1046     kb->CreateBr(doSegmentOuterLoop);
    1047 
     1044    if (mIsDerived[0]) {
     1045        Value * nowProcessed = kb->CreateAdd(processedItemCount[0], linearlyAvailItems);
     1046        kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed);
     1047        kb->CreateBr(doSegmentOuterLoop);
     1048    }
     1049    else {
     1050        Value * nowProcessed = kb->getProcessedItemCount(mStreamSetInputs[0].name);
     1051        Value * allAvailableItemsProcessed = kb->CreateICmpUGE(kb->CreateSub(nowProcessed, processedItemCount[0]), linearlyAvailItems);
     1052        kb->CreateCondBr(allAvailableItemsProcessed, doSegmentOuterLoop, segmentDone);
     1053    }
    10481054
    10491055    //
     
    11721178    //  Update the processed item count (and hence all the counts derived automatically
    11731179    //  therefrom).
    1174     kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemCountNeeded[0]);
     1180    if (mIsDerived[0]) {
     1181        kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemCountNeeded[0]);
     1182    }
    11751183   
    11761184    // Copy back data to the actual output buffers.
     
    12071215    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    12081216    //
    1209     stridesRemaining->addIncoming(kb->CreateSub(stridesRemaining, kb->CreateZExt(haveStrides, kb->getSizeTy())), kb->GetInsertBlock());
    12101217    BasicBlock * setTermination = kb->CreateBasicBlock("mBsetTermination");
    1211     kb->CreateCondBr(haveStrides, doSegmentOuterLoop, setTermination);
     1218    if (mIsDerived[0]) {
     1219        stridesRemaining->addIncoming(kb->CreateSub(stridesRemaining, kb->CreateZExt(haveStrides, kb->getSizeTy())), kb->GetInsertBlock());
     1220        kb->CreateCondBr(haveStrides, doSegmentOuterLoop, setTermination);
     1221    }
     1222    else {
     1223        Value * nowProcessed = kb->getProcessedItemCount(mStreamSetInputs[0].name);
     1224        Value * allAvailableItemsProcessed = kb->CreateICmpEQ(kb->CreateSub(nowProcessed, processedItemCount[0]), finalItemCountNeeded[0]);
     1225        BasicBlock * checkTermination = kb->CreateBasicBlock("checkTermination");
     1226        kb->CreateCondBr(allAvailableItemsProcessed, checkTermination, segmentDone);
     1227        kb->SetInsertPoint(checkTermination);
     1228        stridesRemaining->addIncoming(kb->CreateSub(stridesRemaining, kb->CreateZExt(haveStrides, kb->getSizeTy())), kb->GetInsertBlock());
     1229        kb->CreateCondBr(haveStrides, doSegmentOuterLoop, setTermination);
     1230    }   
    12121231    kb->SetInsertPoint(setTermination);
    12131232    kb->setTerminationSignal();
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5627 r5635  
    1414PDEPkernel::PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width)
    1515: MultiBlockKernel("PDEPdel",
    16                   {Binding{kb->getStreamSetTy(), "PDEPmarkerStream"}, Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet", MaxRatio(1)}},
     16                  {Binding{kb->getStreamSetTy(), "PDEPmarkerStream", MaxRatio(1)}, Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet", MaxRatio(1)}},
    1717                  {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
    1818                  {}, {}, {})
     
    2727    BasicBlock * entry = kb->GetInsertBlock();
    2828    BasicBlock * checkLoopCond = kb->CreateBasicBlock("checkLoopCond");
     29    BasicBlock * checkSourceCount = kb->CreateBasicBlock("checkSourceCount");
    2930    BasicBlock * processBlock = kb->CreateBasicBlock("processBlock");
    3031    BasicBlock * terminate = kb->CreateBasicBlock("terminate");
     
    3435    Value * itemsToDo = &*(args++); // Since PDEP marker stream is a bit stream, this is the number of PDEP marker bits to process
    3536    // Get pointer to start of the StreamSetBlock containing unprocessed input items.
    36     args++; //sourceItemsAvail
     37    Value * sourceItemsAvail = args++;
    3738    Value * PDEPStrmPtr = &*(args++);
    3839    Value * inputSwizzlesPtr = &*(args++);
     
    6162    PHINode * blockOffsetPhi = kb->CreatePHI(kb->getSizeTy(), 2); // block offset from the base block, e.g. 0, 1, 2, ...
    6263    PHINode * updatedProcessedBitsPhi = kb->CreatePHI(kb->getSizeTy(), 2);
     64    PHINode * sourceItemsRemaining = kb->CreatePHI(kb->getSizeTy(), 2);
    6365    blocksToDoPhi->addIncoming(blocksToDo, entry);
    6466    blockOffsetPhi->addIncoming(kb->getSize(0), entry);
    6567    updatedProcessedBitsPhi->addIncoming(processedSourceBits, entry);
     68    sourceItemsRemaining->addIncoming(sourceItemsAvail, entry);
    6669
    6770    Value * haveRemBlocks = kb->CreateICmpUGT(blocksToDoPhi, kb->getSize(0));
    68     kb->CreateCondBr(haveRemBlocks, processBlock, terminate);
     71    kb->CreateCondBr(haveRemBlocks, checkSourceCount, terminate);
    6972
    70     kb->SetInsertPoint(processBlock);
     73    kb->SetInsertPoint(checkSourceCount);
    7174    // Extract the values we will use in the main processing loop
    7275    Value * updatedProcessedBits = updatedProcessedBitsPhi;
     76    Value * updatedSourceItems = sourceItemsRemaining;
    7377    Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, {blockOffsetPhi, kb->getInt32(0)}));
    7478    kb->CallPrintRegister("PDEP_ms_blk", PDEP_ms_blk);
     
    7680    const auto PDEP_masks = get_PDEP_masks(kb, PDEP_ms_blk, mPDEPWidth);   
    7781    const auto mask_popcounts = get_block_popcounts(kb, PDEP_ms_blk, mPDEPWidth);
     82   
     83    Value * total_count = mask_popcounts[0];
     84    for (unsigned j = 1; j < mask_popcounts.size(); j++) {
     85        total_count = kb->CreateAdd(mask_popcounts[j]);
     86    }
     87    kb->CreateCondBr(kb->CreateUGE(total_count, sourceItemsRemaining), processBlock, terminate);
     88    kb->SetInsertPoint(processBlock);
    7889
    7990    // For each mask extracted from the PDEP marker block
     
    114125                                    kb->CallPrintRegister("result_swizzle", result_swizzle);
    115126        updatedProcessedBits = kb->CreateAdd(updatedProcessedBits, mask_popcounts[i]);
     127        updatedSourceItems = kb->CreateSub(updatedSourceItems, mask_popcounts[i]);
    116128    }
    117129
     
    119131    blocksToDoPhi->addIncoming(kb->CreateSub(blocksToDoPhi, kb->getSize(1)), processBlock);
    120132    blockOffsetPhi->addIncoming(kb->CreateAdd(blockOffsetPhi, kb->getSize(1)), processBlock);
     133    sourceItemsRemaining->addIncoming(updatedSourceItems, processBlock);
    121134    kb->CreateBr(checkLoopCond);
    122135
    123136    kb->SetInsertPoint(terminate);
     137   
     138    kb->setProcessedItemCount("PDEPmarkerStream", updatedProcessedBitsPhi);   
    124139    kb->setProcessedItemCount("sourceStreamSet", updatedProcessedBitsPhi);   
    125140}
Note: See TracChangeset for help on using the changeset viewer.