Ignore:
Timestamp:
Oct 11, 2016, 10:40:35 AM (3 years ago)
Author:
cameron
Message:

Do segment method now handles partial/final segment

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5185 r5188  
    167167    BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0);
    168168    BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
     169    BasicBlock * checkFinalBlock = BasicBlock::Create(iBuilder->getContext(), "checkFinalBlock", doSegmentFunction, 0);
     170    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
     171    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
    169172    Type * const size_ty = iBuilder->getSizeTy();
    170173    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     
    175178    Value * blocksToDo = &*(args);
    176179    Value * segmentNo = getLogicalSegmentNo(self);
     180   
    177181    std::vector<Value *> inbufProducerPtrs;
    178    
     182    std::vector<Value *> endSignalPtrs;
    179183    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    180184        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName);
    181185        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
    182     }
    183    
     186        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(ssStructPtr));
     187    }
     188   
     189    std::vector<Value *> producerPos;
    184190    /* Determine the actually available data examining all input stream sets. */
    185     LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t));
    186     producerPos->setOrdering(AtomicOrdering::Acquire);
    187     Value * availablePos = producerPos;
     191    LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t));
     192    p->setOrdering(AtomicOrdering::Acquire);
     193    producerPos.push_back(p);
     194    Value * availablePos = producerPos[0];
    188195    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
    189         LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
    190         producerPos->setOrdering(AtomicOrdering::Acquire);
     196        LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
     197        p->setOrdering(AtomicOrdering::Acquire);
     198        producerPos.push_back(p);
    191199        /* Set the available position to be the minimum of availablePos and producerPos. */
    192         availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, producerPos), availablePos, producerPos);
     200        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
    193201    }
    194202    Value * processed = getProcessedItemCount(self);
     
    199207    Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride);
    200208    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
    201     blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail);
     209    Value * lessThanFullSegment = iBuilder->CreateICmpULT(blocksAvail, blocksToDo);
     210    blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo);
    202211    //iBuilder->CallPrintInt(mKernelName + "_blocksAvail", blocksAvail);
    203212    iBuilder->CreateBr(blockLoopCond);
     
    220229    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride));
    221230    setProcessedItemCount(self, processed);
     231    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalBlock, segmentDone);
     232   
     233    iBuilder->SetInsertPoint(checkFinalBlock);
     234   
     235    /* We had less than a full segment of data; we may have reached the end of input
     236       on one of the stream sets.  */
     237   
     238    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
     239    if (endSignalPtrs.size() > 1) {
     240        /* If there is more than one input stream set, then we need to confirm that one of
     241           them has both the endSignal set and the length = to availablePos. */
     242        endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
     243        for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
     244            Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
     245            endOfInput = iBuilder->CreateOr(endOfInput, e);
     246        }
     247    }
     248    iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
     249   
     250    iBuilder->SetInsertPoint(callFinalBlock);
     251   
     252    Value * remainingItems = iBuilder->CreateURem(availablePos, stride);
     253    createFinalBlockCall(self, remainingItems);
     254    setProcessedItemCount(self, availablePos);
     255   
     256    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     257        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     258        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
     259    }
     260   
     261    iBuilder->CreateBr(segmentDone);
     262   
     263    iBuilder->SetInsertPoint(segmentDone);
    222264    Value * produced = getProducedItemCount(self);
    223265#ifndef NDEBUG
     
    465507    iBuilder->SetInsertPoint(endSignalCheckBlock);
    466508   
    467     LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], sizeof(size_t));
    468     // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
    469     endSignal->setOrdering(AtomicOrdering::Acquire);
     509    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
    470510    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
    471         LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], sizeof(size_t));
    472         endSignal_next->setOrdering(AtomicOrdering::Acquire);
     511        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
    473512        iBuilder->CreateAnd(endSignal, endSignal_next);
    474513    }
    475514       
    476     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(endSignal, ConstantInt::get(iBuilder->getInt8Ty(), 1)), endBlock, inputCheckBlock);
     515    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
    477516   
    478517    iBuilder->SetInsertPoint(doSegmentBlock);
Note: See TracChangeset for help on using the changeset viewer.