Ignore:
Timestamp:
Oct 1, 2016, 12:03:14 PM (3 years ago)
Author:
cameron
Message:

Kernel/pipeline progress: sychronize with logicalSegmentNo

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5171 r5174  
    1010#include <llvm/Support/raw_ostream.h>
    1111#include <llvm/IR/TypeBuilder.h>
     12#include <llvm/Support/ErrorHandling.h>
    1213#include <toolchain.h>
    1314
     
    2627void KernelBuilder::addScalar(Type * t, std::string scalarName) {
    2728    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    28         throw std::runtime_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
     29        llvm::report_fatal_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
    2930    }
    3031    unsigned index = mKernelFields.size();
     
    3637    unsigned blockSize = iBuilder->getBitBlockWidth();
    3738    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    38         throw std::runtime_error("Kernel preparation: Incorrect number of input buffers");
     39        llvm::report_fatal_error("Kernel preparation: Incorrect number of input buffers");
    3940    }
    4041    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
    41         throw std::runtime_error("Kernel preparation: Incorrect number of output buffers");
     42        llvm::report_fatal_error("Kernel preparation: Incorrect number of output buffers");
    4243    }
    4344    addScalar(iBuilder->getSizeTy(), blockNoScalar);
     45    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
     46    addScalar(iBuilder->getSizeTy(), processedItemCount);
     47    addScalar(iBuilder->getSizeTy(), producedItemCount);
     48    addScalar(iBuilder->getInt1Ty(), terminationSignal);
    4449    int streamSetNo = 0;
    4550    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    4651        size_t bufferSize = mStreamSetInputBuffers[i]->getBufferSize() * blockSize;
    4752        if (!(mStreamSetInputBuffers[i]->getBufferStreamSetType() == mStreamSetInputs[i].ssType)) {
    48              throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
     53             llvm::report_fatal_error("Kernel preparation: Incorrect input buffer type");
    4954        }
    50         if ((bufferSize > 0) && (bufferSize < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
     55        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    5156             errs() << "buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
    52              throw std::runtime_error("Kernel preparation: Buffer size too small.");
     57             llvm::report_fatal_error("Kernel preparation: Buffer size too small.");
    5358        }
    54 
    5559        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
    5660        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
     
    5963    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    6064        if (!(mStreamSetOutputBuffers[i]->getBufferStreamSetType() == mStreamSetOutputs[i].ssType)) {
    61              throw std::runtime_error("Kernel preparation: Incorrect output buffer type");
     65             llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type");
    6266        }
    6367        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
     
    146150}
    147151
    148 //  The default doSegment method simply dispatches to the doBlock routine.
     152void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
     153    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     154    iBuilder->CreateCall(doBlockFunction, {self});
     155}
     156
     157//  The default doSegment method dispatches to the doBlock routine for
     158//  each block of the given number of blocksToDo, and then updates counts.
    149159void KernelBuilder::generateDoSegmentMethod() {
    150160    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    151161    Module * m = iBuilder->getModule();
    152     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    153162    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
    154163    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
     
    158167    BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
    159168    Type * const size_ty = iBuilder->getSizeTy();
     169    Value * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     170    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    160171   
    161172    Function::arg_iterator args = doSegmentFunction->arg_begin();
    162173    Value * self = &*(args++);
    163174    Value * blocksToDo = &*(args);
    164    
     175
     176    Value * segmentNo = getLogicalSegmentNo(self);
    165177    iBuilder->CreateBr(blockLoopCond);
    166178
     
    173185    iBuilder->SetInsertPoint(blockLoopBody);
    174186    Value * blockNo = getScalarField(self, blockNoScalar);   
    175     iBuilder->CreateCall(doBlockFunction, {self});
    176     setBlockNo(self, iBuilder->CreateAdd(blockNo, ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth())));
    177     blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, ConstantInt::get(size_ty, 1)), blockLoopBody);
     187    generateDoBlockLogic(self, blockNo);
     188    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
     189    blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, strideBlocks), blockLoopBody);
    178190    iBuilder->CreateBr(blockLoopCond);
    179191   
    180192    iBuilder->SetInsertPoint(blocksDone);
     193    setProcessedItemCount(self, iBuilder->CreateAdd(getProcessedItemCount(self), iBuilder->CreateMul(blocksToDo, stride)));
     194    // Must be the last action, for synchronization.
     195    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1)));
     196
    181197    iBuilder->CreateRetVoid();
    182198    iBuilder->restoreIP(savePoint);
     
    186202    const auto f = mInternalStateNameMap.find(fieldName);
    187203    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
    188         throw std::runtime_error("Kernel does not contain internal state: " + fieldName);
     204        llvm::report_fatal_error("Kernel does not contain internal state: " + fieldName);
    189205    }
    190206    return iBuilder->getInt32(f->second);
     
    203219}
    204220
     221Value * KernelBuilder::getLogicalSegmentNo(Value * self) {
     222    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     223    LoadInst * segNo = iBuilder->CreateAlignedLoad(ptr, sizeof(size_t));
     224    segNo->setOrdering(Acquire);
     225    return segNo;
     226}
     227
     228Value * KernelBuilder::getProcessedItemCount(Value * self) {
     229    return getScalarField(self, processedItemCount);
     230}
     231
     232Value * KernelBuilder::getProducedItemCount(Value * self) {
     233    return getScalarField(self, producedItemCount);
     234}
     235
     236//  By default, kernels do not terminate early. 
     237Value * KernelBuilder::getTerminationSignal(Value * self) {
     238    return ConstantInt::getNullValue(iBuilder->getInt1Ty());
     239}
     240
     241
     242void KernelBuilder::setLogicalSegmentNo(Value * self, Value * newCount) {
     243    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     244    iBuilder->CreateAlignedStore(newCount, ptr, sizeof(size_t))->setOrdering(Release);
     245}
     246
     247void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
     248    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
     249    iBuilder->CreateStore(newCount, ptr);
     250}
     251
     252void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
     253    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
     254    iBuilder->CreateStore(newCount, ptr);
     255}
     256
     257void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) {
     258    llvm::report_fatal_error("This kernel type does not support setTerminationSignal.");
     259}
     260
     261
    205262Value * KernelBuilder::getBlockNo(Value * self) {
    206263    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    207     LoadInst * blockNo = iBuilder->CreateAlignedLoad(ptr, 8);
    208     blockNo->setOrdering(Acquire);
     264    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
    209265    return blockNo;
    210266}
     
    212268void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
    213269    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    214     iBuilder->CreateAlignedStore(newFieldVal, ptr, 8)->setOrdering(Release);
     270    iBuilder->CreateStore(newFieldVal, ptr);
    215271}
    216272
     
    221277        if (arg->getName() == paramName) return arg;
    222278    }
    223     throw std::runtime_error("Method does not have parameter: " + paramName);
     279    llvm::report_fatal_error("Method does not have parameter: " + paramName);
    224280}
    225281
     
    227283    const auto f = mStreamSetNameMap.find(ssName);
    228284    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
    229         throw std::runtime_error("Kernel does not contain stream set: " + ssName);
     285        llvm::report_fatal_error("Kernel does not contain stream set: " + ssName);
    230286    }
    231287    return f->second;
     
    273329    Function * initMethod = m->getFunction(initFnName);
    274330    if (!initMethod) {
    275         throw std::runtime_error("Cannot find " + initFnName);
     331        llvm::report_fatal_error("Cannot find " + initFnName);
    276332    }
    277333    iBuilder->CreateCall(initMethod, init_args);
     
    306362        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
    307363        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
    308         inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getComsumerPosPtr(basePtr));
     364        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(basePtr));
    309365        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
    310366    }
     
    312368        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    313369        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
    314         outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getComsumerPosPtr(basePtr));
     370        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(basePtr));
    315371    }
    316372
     
    339395    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
    340396    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    341         LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], 8);
     397        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], sizeof(size_t));
    342398        producerPos->setOrdering(Acquire);
    343399        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
    344         LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], 8);
     400        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], sizeof(size_t));
    345401        consumerPos->setOrdering(Acquire);
    346402        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
     
    352408    iBuilder->SetInsertPoint(inputCheckBlock);
    353409
     410    Value * requiredSize = segSize;
     411    if (mLookAheadPositions > 0) {
     412        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
     413    }
    354414    waitCondTest = ConstantInt::get(int1ty, 1);
    355415    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
    356         LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], 8);
     416        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
    357417        producerPos->setOrdering(Acquire);
    358418        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
    359         LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], 8);
     419        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], sizeof(size_t));
    360420        consumerPos->setOrdering(Acquire);
    361421        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
    362         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, segSize), producerPos));
     422        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
    363423    }
    364424
     
    367427    iBuilder->SetInsertPoint(endSignalCheckBlock);
    368428   
    369     LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], 8);
     429    LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], sizeof(size_t));
    370430    // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
    371431    endSignal->setOrdering(Acquire);
    372432    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
    373         LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], 8);
     433        LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], sizeof(size_t));
    374434        endSignal_next->setOrdering(Acquire);
    375435        iBuilder->CreateAnd(endSignal, endSignal_next);
     
    384444    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    385445        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
    386         iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
    387     }
     446        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(Release);
     447    }
     448   
     449    Value * produced = getProducedItemCount(self);
    388450    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    389         Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), segSize);
    390         iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
    391     }
    392    
     451        iBuilder->CreateAlignedStore(produced, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(Release);
     452    }
     453   
     454    Value * earlyEndSignal = getTerminationSignal(self);
     455    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
     456        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
     457        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
     458
     459        iBuilder->SetInsertPoint(earlyEndBlock);
     460        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     461            Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     462            mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     463        }       
     464    }
    393465    iBuilder->CreateBr(outputCheckBlock);
    394466     
     
    415487    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    416488        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
    417         iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     489        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(Release);
    418490    }
    419491    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    420         Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
    421         iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     492       
     493        Value * produced = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
     494        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(Release);
    422495    }
    423496
Note: See TracChangeset for help on using the changeset viewer.