Ignore:
Timestamp:
Sep 13, 2017, 11:50:27 AM (20 months ago)
Author:
cameron
Message:

PDEP kernel: only process blocks if sufficient source stream data available

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5627 r5635  
    1414PDEPkernel::PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned streamCount, unsigned swizzleFactor, unsigned PDEP_width)
    1515: MultiBlockKernel("PDEPdel",
    16                   {Binding{kb->getStreamSetTy(), "PDEPmarkerStream"}, Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet", MaxRatio(1)}},
     16                  {Binding{kb->getStreamSetTy(), "PDEPmarkerStream", MaxRatio(1)}, Binding{kb->getStreamSetTy(streamCount), "sourceStreamSet", MaxRatio(1)}},
    1717                  {Binding{kb->getStreamSetTy(streamCount), "outputStreamSet"}},
    1818                  {}, {}, {})
     
    2727    BasicBlock * entry = kb->GetInsertBlock();
    2828    BasicBlock * checkLoopCond = kb->CreateBasicBlock("checkLoopCond");
     29    BasicBlock * checkSourceCount = kb->CreateBasicBlock("checkSourceCount");
    2930    BasicBlock * processBlock = kb->CreateBasicBlock("processBlock");
    3031    BasicBlock * terminate = kb->CreateBasicBlock("terminate");
     
    3435    Value * itemsToDo = &*(args++); // Since PDEP marker stream is a bit stream, this is the number of PDEP marker bits to process
    3536    // Get pointer to start of the StreamSetBlock containing unprocessed input items.
    36     args++; //sourceItemsAvail
     37    Value * sourceItemsAvail = args++;
    3738    Value * PDEPStrmPtr = &*(args++);
    3839    Value * inputSwizzlesPtr = &*(args++);
     
    6162    PHINode * blockOffsetPhi = kb->CreatePHI(kb->getSizeTy(), 2); // block offset from the base block, e.g. 0, 1, 2, ...
    6263    PHINode * updatedProcessedBitsPhi = kb->CreatePHI(kb->getSizeTy(), 2);
     64    PHINode * sourceItemsRemaining = kb->CreatePHI(kb->getSizeTy(), 2);
    6365    blocksToDoPhi->addIncoming(blocksToDo, entry);
    6466    blockOffsetPhi->addIncoming(kb->getSize(0), entry);
    6567    updatedProcessedBitsPhi->addIncoming(processedSourceBits, entry);
     68    sourceItemsRemaining->addIncoming(sourceItemsAvail, entry);
    6669
    6770    Value * haveRemBlocks = kb->CreateICmpUGT(blocksToDoPhi, kb->getSize(0));
    68     kb->CreateCondBr(haveRemBlocks, processBlock, terminate);
     71    kb->CreateCondBr(haveRemBlocks, checkSourceCount, terminate);
    6972
    70     kb->SetInsertPoint(processBlock);
     73    kb->SetInsertPoint(checkSourceCount);
    7174    // Extract the values we will use in the main processing loop
    7275    Value * updatedProcessedBits = updatedProcessedBitsPhi;
     76    Value * updatedSourceItems = sourceItemsRemaining;
    7377    Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, {blockOffsetPhi, kb->getInt32(0)}));
    7478    kb->CallPrintRegister("PDEP_ms_blk", PDEP_ms_blk);
     
    7680    const auto PDEP_masks = get_PDEP_masks(kb, PDEP_ms_blk, mPDEPWidth);   
    7781    const auto mask_popcounts = get_block_popcounts(kb, PDEP_ms_blk, mPDEPWidth);
     82   
     83    Value * total_count = mask_popcounts[0];
     84    for (unsigned j = 1; j < mask_popcounts.size(); j++) {
     85        total_count = kb->CreateAdd(mask_popcounts[j]);
     86    }
     87    kb->CreateCondBr(kb->CreateUGE(total_count, sourceItemsRemaining), processBlock, terminate);
     88    kb->SetInsertPoint(processBlock);
    7889
    7990    // For each mask extracted from the PDEP marker block
     
    114125                                    kb->CallPrintRegister("result_swizzle", result_swizzle);
    115126        updatedProcessedBits = kb->CreateAdd(updatedProcessedBits, mask_popcounts[i]);
     127        updatedSourceItems = kb->CreateSub(updatedSourceItems, mask_popcounts[i]);
    116128    }
    117129
     
    119131    blocksToDoPhi->addIncoming(kb->CreateSub(blocksToDoPhi, kb->getSize(1)), processBlock);
    120132    blockOffsetPhi->addIncoming(kb->CreateAdd(blockOffsetPhi, kb->getSize(1)), processBlock);
     133    sourceItemsRemaining->addIncoming(updatedSourceItems, processBlock);
    121134    kb->CreateBr(checkLoopCond);
    122135
    123136    kb->SetInsertPoint(terminate);
     137   
     138    kb->setProcessedItemCount("PDEPmarkerStream", updatedProcessedBitsPhi);   
    124139    kb->setProcessedItemCount("sourceStreamSet", updatedProcessedBitsPhi);   
    125140}
Note: See TracChangeset for help on using the changeset viewer.