Ignore:
Timestamp:
Jan 12, 2017, 7:03:38 PM (2 years ago)
Author:
cameron
Message:

finalSegment kernel methods initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5252 r5257  
    109109    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
    110110    generateDoSegmentMethod();
     111    generateFinalSegmentMethod();
    111112
    112113    // Implement the accumulator get functions
     
    169170}
    170171
     172
    171173//  The default doSegment method dispatches to the doBlock routine for
    172174//  each block of the given number of blocksToDo, and then updates counts.
     
    180182    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
    181183    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
    182     BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0);
    183     BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0);
    184     BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
    185184    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
    186185    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
     
    214213    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
    215214    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    216 //#ifndef NDEBUG
    217 //    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
    218 //#endif
     215#ifndef NDEBUG
     216    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
     217#endif
    219218    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
    220219    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
     
    242241    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
    243242    setProcessedItemCount(self, mStreamSetInputs[0].name, processed);
    244     iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone);
    245    
    246     iBuilder->SetInsertPoint(checkFinalStride);
    247    
    248     /* We had less than a full segment of data; we may have reached the end of input
    249        on one of the stream sets.  */
    250    
    251     Value * alreadyDone = getTerminationSignal(self);
    252     iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals);
    253    
    254     iBuilder->SetInsertPoint(checkEndSignals);
    255     Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
    256     if (endSignalPtrs.size() > 1) {
    257         /* If there is more than one input stream set, then we need to confirm that one of
    258            them has both the endSignal set and the length = to availablePos. */
    259         endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
    260         for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
    261             Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
    262             endOfInput = iBuilder->CreateOr(endOfInput, e);
    263         }
    264     }
    265     iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
    266    
    267     iBuilder->SetInsertPoint(callFinalBlock);
    268    
    269     Value * remainingItems = iBuilder->CreateSub(availablePos, processed);
    270     createFinalBlockCall(self, remainingItems);
    271     setProcessedItemCount(self, mStreamSetInputs[0].name, availablePos);
    272    
    273     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    274         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    275         mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    276     }
    277     setTerminationSignal(self);
    278243    iBuilder->CreateBr(segmentDone);
    279    
    280244    iBuilder->SetInsertPoint(segmentDone);
    281 //#ifndef NDEBUG
    282 //    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
    283 //#endif
     245#ifndef NDEBUG
     246    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
     247#endif
    284248    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    285249        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
     
    294258    iBuilder->restoreIP(savePoint);
    295259}
     260
     261void KernelBuilder::generateFinalSegmentMethod() const {
     262    auto savePoint = iBuilder->saveIP();
     263    Module * m = iBuilder->getModule();
     264    Function * finalSegmentFunction = m->getFunction(mKernelName + finalSegment_suffix);
     265    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalSegmentFunction, 0));
     266    BasicBlock * doStrides = BasicBlock::Create(iBuilder->getContext(), "doStrides", finalSegmentFunction, 0);
     267    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", finalSegmentFunction, 0);
     268    Type * const size_ty = iBuilder->getSizeTy();
     269    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     270    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
     271    Function::arg_iterator args = finalSegmentFunction->arg_begin();
     272    Value * self = &*(args++);
     273    Value * blocksToDo = &*(args);
     274    std::vector<Value *> inbufProducerPtrs;
     275    std::vector<Value *> endSignalPtrs;
     276    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     277        Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
     278        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
     279        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
     280    }
     281   
     282    std::vector<Value *> producerPos;
     283    /* Determine the actually available data examining all input stream sets. */
     284    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
     285    producerPos.push_back(p);
     286    Value * availablePos = producerPos[0];
     287    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
     288        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
     289        producerPos.push_back(p);
     290        /* Set the available position to be the minimum of availablePos and producerPos. */
     291        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
     292    }
     293    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
     294    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
     295#ifndef NDEBUG
     296    iBuilder->CallPrintInt(mKernelName + "_itemsAvail final", itemsAvail);
     297#endif
     298    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
     299    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
     300    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
     301    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
     302    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
     303    Value * notDone = iBuilder->CreateICmpUGT(stridesToDo, ConstantInt::get(size_ty, 0));
     304    iBuilder->CreateCondBr(notDone, doStrides, stridesDone);
     305   
     306    iBuilder->SetInsertPoint(doStrides);
     307    createDoSegmentCall(self, blocksToDo);
     308    iBuilder->CreateBr(stridesDone);
     309   
     310    iBuilder->SetInsertPoint(stridesDone);
     311    /* Now at most a partial block remains. */
     312   
     313    processed = getProcessedItemCount(self, mStreamSetInputs[0].name);   
     314    Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
     315    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
     316       
     317    createFinalBlockCall(self, remainingItems);
     318    processed = iBuilder->CreateAdd(processed, remainingItems);
     319    setProcessedItemCount(self, mStreamSetInputs[0].name, processed);
     320       
     321#ifndef NDEBUG
     322    iBuilder->CallPrintInt(mKernelName + "_processed final", processed);
     323#endif
     324    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     325        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
     326        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
     327        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
     328        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
     329    }
     330
     331    iBuilder->CreateRetVoid();
     332
     333    iBuilder->restoreIP(savePoint);
     334}
     335
     336
    296337
    297338ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
Note: See TracChangeset for help on using the changeset viewer.