Ignore:
Timestamp:
Oct 15, 2016, 11:22:19 PM (3 years ago)
Author:
cameron
Message:

Restructuring pipeline control to use termination signals

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5175 r5194  
    3838     // Create the basic blocks for the thread function.
    3939    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
    40     BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentCond", threadFunc, 0);
    41     BasicBlock * finalSegmentLoopExit = BasicBlock::Create(iBuilder->getContext(), "partialSegmentCond", threadFunc, 0);
     40    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0);
    4241    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
    4342    std::vector<BasicBlock *> segmentWait;
    4443    std::vector<BasicBlock *> segmentLoopBody;
    45     std::vector<BasicBlock *> partialSegmentWait;
    46     std::vector<BasicBlock *> partialSegmentLoopBody;
    4744    for (unsigned i = 0; i < kernels.size(); i++) {
    4845        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
    4946        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
    50         partialSegmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentWait"+std::to_string(i), threadFunc, 0));
    51         partialSegmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "partialSegmentLoopBody"+std::to_string(i), threadFunc, 0));
    5247    }
    5348
    5449    iBuilder->SetInsertPoint(entryBlock);
    5550    Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
    56     Value * myThreadId = ConstantInt::get(size_ty, id);
    57     Value * fileSize = iBuilder->CreateLoad(iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     51    Constant * myThreadId = ConstantInt::get(size_ty, id);
    5852    std::vector<Value *> instancePtrs;
    5953    for (unsigned i = 0; i < kernels.size(); i++) {
     
    6559    int segmentSize = codegen::SegmentSize;
    6660    Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
    67     Constant * segmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);
    68     Constant * hypersegmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize * threadNum);
    69     Constant * const blockSize = ConstantInt::get(size_ty, iBuilder->getStride());
    70 
    71     Value * myFirstSegNo = myThreadId;  //
    72     // The offset of my starting segment within the thread group hypersegment.
    73     Value * myOffset = iBuilder->CreateMul(segmentBytes, myThreadId);
    74     Value * fullSegLimit = iBuilder->CreateAdd(myOffset, segmentBytes);
    75 
    7661    iBuilder->CreateBr(segmentLoop);
    7762
    7863    iBuilder->SetInsertPoint(segmentLoop);
    79     PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    80     remainingBytes->addIncoming(fileSize, entryBlock);
    8164    PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo");
    82     segNo->addIncoming(myFirstSegNo, entryBlock);
    83 
    84     Value * LT_fullSegment = iBuilder->CreateICmpSLT(remainingBytes, fullSegLimit);
    85     iBuilder->CreateCondBr(LT_fullSegment, finalSegmentLoopExit, segmentWait[0]);
     65    segNo->addIncoming(myThreadId, entryBlock);
     66    unsigned last_kernel = kernels.size() - 1;
     67    Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
     68    iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]);
    8669
    8770    for (unsigned i = 0; i < kernels.size(); i++) {
     
    9376        iBuilder->SetInsertPoint(segmentLoopBody[i]);
    9477        kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks);
    95         if (i == kernels.size() - 1) break;
     78        if (i == last_kernel) break;
    9679        iBuilder->CreateBr(segmentWait[i+1]);
    9780    }
    9881   
    99     remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, hypersegmentBytes), segmentLoopBody[kernels.size()-1]);
    100     segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[kernels.size()-1]);
    101     iBuilder->CreateBr(segmentLoop);
    102 
    103     // Now we may have a partial segment, or we may be completely done
    104     // because the last segment was handled by a previous thread in the group.
    105     iBuilder->SetInsertPoint(finalSegmentLoopExit);
    106     Value * alreadyDone = iBuilder->CreateICmpSLT(remainingBytes, myOffset);
    107     Value * remainingForMe = iBuilder->CreateSub(remainingBytes, myOffset);
    108     Value * blocksToDo = iBuilder->CreateUDiv(remainingForMe, blockSize);
    109     iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, partialSegmentWait[0]);
    110 
    111     // Full Block Pipeline loop
    112     for (unsigned i = 0; i < kernels.size(); i++) {
    113         iBuilder->SetInsertPoint(partialSegmentWait[i]);
    114         Value * processedSegmentCount = kernels[i]->getLogicalSegmentNo(instancePtrs[i]);
    115         Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    116         iBuilder->CreateCondBr(cond, partialSegmentLoopBody[i], partialSegmentWait[i]);
    117 
    118         iBuilder->SetInsertPoint(partialSegmentLoopBody[i]);
    119         kernels[i]->createDoSegmentCall(instancePtrs[i], blocksToDo);
    120         kernels[i]->createFinalBlockCall(instancePtrs[i], iBuilder->CreateURem(remainingForMe, blockSize));
    121         if (i == kernels.size() - 1) break;
    122         iBuilder->CreateBr(partialSegmentWait[i+1]);
    123     }
    124     iBuilder->CreateBr(exitThreadBlock);
    125 
     82    segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]);
     83    Value * endSignal = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
     84    iBuilder->CreateCondBr(endSignal, exitThreadBlock, segmentLoop);
     85   
    12686    iBuilder->SetInsertPoint(exitThreadBlock);
    12787    Value * nullVal = Constant::getNullValue(voidPtrTy);
     
    244204
    245205    // Create the basic blocks for the loop.
    246     BasicBlock * segmentCondBlock = nullptr;
    247     BasicBlock * segmentBodyBlock = nullptr;
    248     if (segmentSize > 1) {
    249         segmentCondBlock = BasicBlock::Create(iBuilder->getContext(), "segmentCond", main, 0);
    250         segmentBodyBlock = BasicBlock::Create(iBuilder->getContext(), "segmentBody", main, 0);
    251     }
    252     BasicBlock * fullCondBlock = BasicBlock::Create(iBuilder->getContext(), "fullCond", main, 0);
    253     BasicBlock * fullBodyBlock = BasicBlock::Create(iBuilder->getContext(), "fullBody", main, 0);
    254     BasicBlock * finalBlock = BasicBlock::Create(iBuilder->getContext(), "final", main, 0);
    255     BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", main, 0);
    256    
    257    
    258     Value * initialBufferSize = nullptr;
    259     Value * initialBlockNo = nullptr;
    260     BasicBlock * initialBlock = nullptr;
    261    
    262     if (segmentSize > 1) {
    263         iBuilder->CreateBr(segmentCondBlock);
    264         iBuilder->SetInsertPoint(segmentCondBlock);
    265         PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    266         remainingBytes->addIncoming(fileSize, entryBlock);
    267         PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo");
    268         blockNo->addIncoming(ConstantInt::get(size_ty, 0), entryBlock);
    269        
    270         Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);
    271         Value * segmentCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    272         iBuilder->CreateCondBr(segmentCondTest, fullCondBlock, segmentBodyBlock);
    273        
    274         iBuilder->SetInsertPoint(segmentBodyBlock);
    275         Value * segBlocks = ConstantInt::get(size_ty, segmentSize);
    276         for (unsigned i = 0; i < kernels.size(); i++) {
    277             kernels[i]->createDoSegmentCall(instances[i], segBlocks);
    278         }
    279         remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    280         blockNo->addIncoming(iBuilder->CreateAdd(blockNo, segBlocks), segmentBodyBlock);
    281        
    282         iBuilder->CreateBr(segmentCondBlock);
    283         initialBufferSize = remainingBytes;
    284         initialBlockNo = blockNo;
    285         initialBlock = segmentCondBlock;
    286     } else {
    287         initialBufferSize = fileSize;
    288         initialBlockNo = ConstantInt::get(size_ty, 0);
    289         initialBlock = entryBlock;
    290         iBuilder->CreateBr(fullCondBlock);
    291     }
    292    
    293     iBuilder->SetInsertPoint(fullCondBlock);
    294     PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    295     remainingBytes->addIncoming(initialBufferSize, initialBlock);
    296     PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo");
    297     blockNo->addIncoming(initialBlockNo, initialBlock);
    298    
    299     Constant * const step = ConstantInt::get(size_ty, iBuilder->getStride());
    300     Value * fullCondTest = iBuilder->CreateICmpULT(remainingBytes, step);
    301     iBuilder->CreateCondBr(fullCondTest, finalBlock, fullBodyBlock);
    302    
    303     // Full Block Pipeline loop
    304     iBuilder->SetInsertPoint(fullBodyBlock);
    305     for (unsigned i = 0; i < kernels.size(); i++) {
    306         kernels[i]->createDoSegmentCall(instances[i], ConstantInt::get(size_ty, 1));
    307     }
    308    
    309     remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), fullBodyBlock);
    310     blockNo->addIncoming(iBuilder->CreateAdd(blockNo, ConstantInt::get(size_ty, 1)), fullBodyBlock);
    311     iBuilder->CreateBr(fullCondBlock);
    312    
    313     iBuilder->SetInsertPoint(finalBlock);
    314     for (unsigned i = 0; i < kernels.size(); i++) {
    315         kernels[i]->createFinalBlockCall(instances[i], remainingBytes);
    316     }
    317     iBuilder->CreateBr(exitBlock);
     206    BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
     207    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
     208    iBuilder->CreateBr(segmentBlock);
     209    iBuilder->SetInsertPoint(segmentBlock);
     210    Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
     211    for (unsigned i = 0; i < kernels.size(); i++) {
     212        kernels[i]->createDoSegmentCall(instances[i], segBlocks);
     213    }
     214    Value * endSignal = kernels[kernels.size()-1]->getTerminationSignal(instances[kernels.size()-1]);
     215    iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock);
    318216    iBuilder->SetInsertPoint(exitBlock);
    319217
Note: See TracChangeset for help on using the changeset viewer.