Ignore:
Timestamp:
Oct 15, 2016, 11:22:19 PM (3 years ago)
Author:
cameron
Message:

Restructuring pipeline control to use termination signals

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5193 r5194  
    164164    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
    165165    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    166     BasicBlock * blockLoopCond = BasicBlock::Create(iBuilder->getContext(), "blockLoopCond", doSegmentFunction, 0);
    167     BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0);
    168     BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
    169     BasicBlock * checkFinalBlock = BasicBlock::Create(iBuilder->getContext(), "checkFinalBlock", doSegmentFunction, 0);
     166    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
     167    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
     168    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
     169    BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0);
     170    BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0);
    170171    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
    171172    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
     173    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
    172174    Type * const size_ty = iBuilder->getSizeTy();
    173175    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     
    193195    Value * availablePos = producerPos[0];
    194196    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
    195        
    196197        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
    197198        producerPos.push_back(p);
     
    204205    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
    205206#endif
    206     Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride);
     207    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
     208    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
    207209    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
    208     Value * lessThanFullSegment = iBuilder->CreateICmpULT(blocksAvail, blocksToDo);
    209     blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo);
    210     //iBuilder->CallPrintInt(mKernelName + "_blocksAvail", blocksAvail);
    211     iBuilder->CreateBr(blockLoopCond);
    212 
    213     iBuilder->SetInsertPoint(blockLoopCond);
    214     PHINode * blocksRemaining = iBuilder->CreatePHI(size_ty, 2, "blocksRemaining");
    215     blocksRemaining->addIncoming(blocksToDo, entryBlock);
    216     Value * notDone = iBuilder->CreateICmpUGT(blocksRemaining, ConstantInt::get(size_ty, 0));
    217     iBuilder->CreateCondBr(notDone, blockLoopBody, blocksDone);
    218 
    219     iBuilder->SetInsertPoint(blockLoopBody);
     210    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
     211    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
     212    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
     213    iBuilder->CreateBr(strideLoopCond);
     214
     215    iBuilder->SetInsertPoint(strideLoopCond);
     216    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
     217    stridesRemaining->addIncoming(stridesToDo, entryBlock);
     218    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
     219    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
     220
     221    iBuilder->SetInsertPoint(strideLoopBody);
    220222    Value * blockNo = getScalarField(self, blockNoScalar);   
    221223
    222224    generateDoBlockLogic(self, blockNo);
    223225    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
    224     blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, ConstantInt::get(size_ty, 1)), blockLoopBody);
    225     iBuilder->CreateBr(blockLoopCond);
    226    
    227     iBuilder->SetInsertPoint(blocksDone);
    228     processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride));
     226    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
     227    iBuilder->CreateBr(strideLoopCond);
     228   
     229    iBuilder->SetInsertPoint(stridesDone);
     230    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
    229231    setProcessedItemCount(self, processed);
    230     iBuilder->CreateCondBr(lessThanFullSegment, checkFinalBlock, segmentDone);
    231    
    232     iBuilder->SetInsertPoint(checkFinalBlock);
     232    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone);
     233   
     234    iBuilder->SetInsertPoint(checkFinalStride);
    233235   
    234236    /* We had less than a full segment of data; we may have reached the end of input
    235237       on one of the stream sets.  */
    236238   
     239    Value * alreadyDone = getTerminationSignal(self);
     240    iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals);
     241   
     242    iBuilder->SetInsertPoint(checkEndSignals);
    237243    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
    238244    if (endSignalPtrs.size() > 1) {
     
    249255    iBuilder->SetInsertPoint(callFinalBlock);
    250256   
    251     Value * remainingItems = iBuilder->CreateURem(availablePos, stride);
     257    Value * remainingItems = iBuilder->CreateSub(availablePos, processed);
    252258    createFinalBlockCall(self, remainingItems);
    253259    setProcessedItemCount(self, availablePos);
     
    257263        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    258264    }
    259    
     265    setTerminationSignal(self);
    260266    iBuilder->CreateBr(segmentDone);
    261267   
     
    273279    // Must be the last action, for synchronization.
    274280    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1)));
     281    iBuilder->CreateBr(finalExit);
     282   
     283    iBuilder->SetInsertPoint(finalExit);
    275284
    276285    iBuilder->CreateRetVoid();
     
    312321}
    313322
    314 //  By default, kernels do not terminate early. 
    315323Value * KernelBuilder::getTerminationSignal(Value * self) {
    316     return ConstantInt::getNullValue(iBuilder->getInt1Ty());
     324    return getScalarField(self, terminationSignal);
    317325}
    318326
     
    333341}
    334342
    335 void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) {
    336     llvm::report_fatal_error("This kernel type does not support setTerminationSignal.");
    337 }
     343void KernelBuilder::setTerminationSignal(Value * self) {
     344    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
     345    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
     346}
     347                                     
    338348
    339349
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5185 r5194  
    5454    virtual llvm::Value * getProcessedItemCount(llvm::Value * kernelInstance) override;
    5555    virtual llvm::Value * getProducedItemCount(llvm::Value * kernelInstance) override;
    56     virtual llvm::Value * getTerminationSignal(llvm::Value * kernelInstance) override;
     56    llvm::Value * getTerminationSignal(llvm::Value * kernelInstance);
    5757   
    5858   
     
    113113
    114114    llvm::Value * getStreamSetBlockPtr(Value * self, std::string ssName, Value * blockNo);
    115 
     115   
    116116    void setBlockNo(Value * self, Value * newFieldVal);
    117117    virtual void setLogicalSegmentNo(llvm::Value * self, Value * newFieldVal);
    118118    virtual void setProcessedItemCount(llvm::Value * self, Value * newFieldVal);
    119119    virtual void setProducedItemCount(llvm::Value * self, Value * newFieldVal);
    120     virtual void setTerminationSignal(llvm::Value * self, Value * newFieldVal);
     120    void setTerminationSignal(llvm::Value * self);
    121121   
    122    
     122
    123123protected:
    124124
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5175 r5194  
    3838     // Create the basic blocks for the thread function.
    3939    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
    40     BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentCond", threadFunc, 0);
    41     BasicBlock * finalSegmentLoopExit = BasicBlock::Create(iBuilder->getContext(), "partialSegmentCond", threadFunc, 0);
     40    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0);
    4241    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
    4342    std::vector<BasicBlock *> segmentWait;
    4443    std::vector<BasicBlock *> segmentLoopBody;
    45     std::vector<BasicBlock *> partialSegmentWait;
    46     std::vector<BasicBlock *> partialSegmentLoopBody;
    4744    for (unsigned i = 0; i < kernels.size(); i++) {
    4845        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
    4946        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
    50         partialSegmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentWait"+std::to_string(i), threadFunc, 0));
    51         partialSegmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentLoopBody"+std::to_string(i), threadFunc, 0));
    5247    }
    5348
    5449    iBuilder->SetInsertPoint(entryBlock);
    5550    Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
    56     Value * myThreadId = ConstantInt::get(size_ty, id);
    57     Value * fileSize = iBuilder->CreateLoad(iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     51    Constant * myThreadId = ConstantInt::get(size_ty, id);
    5852    std::vector<Value *> instancePtrs;
    5953    for (unsigned i = 0; i < kernels.size(); i++) {
     
    6559    int segmentSize = codegen::SegmentSize;
    6660    Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
    67     Constant * segmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);
    68     Constant * hypersegmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize * threadNum);
    69     Constant * const blockSize = ConstantInt::get(size_ty, iBuilder->getStride());
    70 
    71     Value * myFirstSegNo = myThreadId;  //
    72     // The offset of my starting segment within the thread group hypersegment.
    73     Value * myOffset = iBuilder->CreateMul(segmentBytes, myThreadId);
    74     Value * fullSegLimit = iBuilder->CreateAdd(myOffset, segmentBytes);
    75 
    7661    iBuilder->CreateBr(segmentLoop);
    7762
    7863    iBuilder->SetInsertPoint(segmentLoop);
    79     PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    80     remainingBytes->addIncoming(fileSize, entryBlock);
    8164    PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo");
    82     segNo->addIncoming(myFirstSegNo, entryBlock);
    83 
    84     Value * LT_fullSegment = iBuilder->CreateICmpSLT(remainingBytes, fullSegLimit);
    85     iBuilder->CreateCondBr(LT_fullSegment, finalSegmentLoopExit, segmentWait[0]);
     65    segNo->addIncoming(myThreadId, entryBlock);
     66    unsigned last_kernel = kernels.size() - 1;
     67    Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
     68    iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]);
    8669
    8770    for (unsigned i = 0; i < kernels.size(); i++) {
     
    9376        iBuilder->SetInsertPoint(segmentLoopBody[i]);
    9477        kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks);
    95         if (i == kernels.size() - 1) break;
     78        if (i == last_kernel) break;
    9679        iBuilder->CreateBr(segmentWait[i+1]);
    9780    }
    9881   
    99     remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, hypersegmentBytes), segmentLoopBody[kernels.size()-1]);
    100     segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[kernels.size()-1]);
    101     iBuilder->CreateBr(segmentLoop);
    102 
    103     // Now we may have a partial segment, or we may be completely done
    104     // because the last segment was handled by a previous thread in the group.
    105     iBuilder->SetInsertPoint(finalSegmentLoopExit);
    106     Value * alreadyDone = iBuilder->CreateICmpSLT(remainingBytes, myOffset);
    107     Value * remainingForMe = iBuilder->CreateSub(remainingBytes, myOffset);
    108     Value * blocksToDo = iBuilder->CreateUDiv(remainingForMe, blockSize);
    109     iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, partialSegmentWait[0]);
    110 
    111     // Full Block Pipeline loop
    112     for (unsigned i = 0; i < kernels.size(); i++) {
    113         iBuilder->SetInsertPoint(partialSegmentWait[i]);
    114         Value * processedSegmentCount = kernels[i]->getLogicalSegmentNo(instancePtrs[i]);
    115         Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    116         iBuilder->CreateCondBr(cond, partialSegmentLoopBody[i], partialSegmentWait[i]);
    117 
    118         iBuilder->SetInsertPoint(partialSegmentLoopBody[i]);
    119         kernels[i]->createDoSegmentCall(instancePtrs[i], blocksToDo);
    120         kernels[i]->createFinalBlockCall(instancePtrs[i], iBuilder->CreateURem(remainingForMe, blockSize));
    121         if (i == kernels.size() - 1) break;
    122         iBuilder->CreateBr(partialSegmentWait[i+1]);
    123     }
    124     iBuilder->CreateBr(exitThreadBlock);
    125 
     82    segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]);
     83    Value * endSignal = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
     84    iBuilder->CreateCondBr(endSignal, exitThreadBlock, segmentLoop);
     85   
    12686    iBuilder->SetInsertPoint(exitThreadBlock);
    12787    Value * nullVal = Constant::getNullValue(voidPtrTy);
     
    244204
    245205    // Create the basic blocks for the loop.
    246     BasicBlock * segmentCondBlock = nullptr;
    247     BasicBlock * segmentBodyBlock = nullptr;
    248     if (segmentSize > 1) {
    249         segmentCondBlock = BasicBlock::Create(iBuilder->getContext(), "segmentCond", main, 0);
    250         segmentBodyBlock = BasicBlock::Create(iBuilder->getContext(), "segmentBody", main, 0);
    251     }
    252     BasicBlock * fullCondBlock = BasicBlock::Create(iBuilder->getContext(), "fullCond", main, 0);
    253     BasicBlock * fullBodyBlock = BasicBlock::Create(iBuilder->getContext(), "fullBody", main, 0);
    254     BasicBlock * finalBlock = BasicBlock::Create(iBuilder->getContext(), "final", main, 0);
    255     BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", main, 0);
    256    
    257    
    258     Value * initialBufferSize = nullptr;
    259     Value * initialBlockNo = nullptr;
    260     BasicBlock * initialBlock = nullptr;
    261    
    262     if (segmentSize > 1) {
    263         iBuilder->CreateBr(segmentCondBlock);
    264         iBuilder->SetInsertPoint(segmentCondBlock);
    265         PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    266         remainingBytes->addIncoming(fileSize, entryBlock);
    267         PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo");
    268         blockNo->addIncoming(ConstantInt::get(size_ty, 0), entryBlock);
    269        
    270         Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);
    271         Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    272         iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
    273        
    274         iBuilder->SetInsertPoint(segmentBodyBlock);
    275         Value * segBlocks = ConstantInt::get(size_ty, segmentSize);
    276         for (unsigned i = 0; i < kernels.size(); i++) {
    277             kernels[i]->createDoSegmentCall(instances[i], segBlocks);
    278         }
    279         remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    280         blockNo->addIncoming(iBuilder->CreateAdd(blockNo, segBlocks), segmentBodyBlock);
    281        
    282         iBuilder->CreateBr(segmentCondBlock);
    283         initialBufferSize = remainingBytes;
    284         initialBlockNo = blockNo;
    285         initialBlock = segmentCondBlock;
    286     } else {
    287         initialBufferSize = fileSize;
    288         initialBlockNo = ConstantInt::get(size_ty, 0);
    289         initialBlock = entryBlock;
    290         iBuilder->CreateBr(fullCondBlock);
    291     }
    292    
    293     iBuilder->SetInsertPoint(fullCondBlock);
    294     PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    295     remainingBytes->addIncoming(initialBufferSize, initialBlock);
    296     PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo");
    297     blockNo->addIncoming(initialBlockNo, initialBlock);
    298    
    299     Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride());
    300     Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    301     iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
    302    
    303     // Full Block Pipeline loop
    304     iBuilder->SetInsertPoint(fullBodyBlock);
    305     for (unsigned i = 0; i < kernels.size(); i++) {
    306         kernels[i]->createDoSegmentCall(instances[i], ConstantInt::get(size_ty, 1));
    307     }
    308    
    309     remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), fullBodyBlock);
    310     blockNo->addIncoming(iBuilder->CreateAdd(blockNo, ConstantInt::get(size_ty, 1)), fullBodyBlock);
    311     iBuilder->CreateBr(fullCondBlock);
    312    
    313     iBuilder->SetInsertPoint(finalBlock);
    314     for (unsigned i = 0; i < kernels.size(); i++) {
    315         kernels[i]->createFinalBlockCall(instances[i], remainingBytes);
    316     }
    317     iBuilder->CreateBr(exitBlock);
     206    BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
     207    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
     208    iBuilder->CreateBr(segmentBlock);
     209    iBuilder->SetInsertPoint(segmentBlock);
     210    Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
     211    for (unsigned i = 0; i < kernels.size(); i++) {
     212        kernels[i]->createDoSegmentCall(instances[i], segBlocks);
     213    }
     214    Value * endSignal = kernels[kernels.size()-1]->getTerminationSignal(instances[kernels.size()-1]);
     215    iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock);
    318216    iBuilder->SetInsertPoint(exitBlock);
    319217
Note: See TracChangeset for help on using the changeset viewer.