Changeset 5174 for icGREP


Ignore:
Timestamp:
Oct 1, 2016, 12:03:14 PM (3 years ago)
Author:
cameron
Message:

Kernel/pipeline progress: sychronize with logicalSegmentNo

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5135 r5174  
    117117}
    118118
    119 Value * KernelInterface::createInstance(std::vector<Value *> args,
    120                                         std::vector<StreamSetBuffer *> inputBuffers,
    121                                         std::vector<StreamSetBuffer *> outputBuffers) {
    122     Value * kernelInstance = iBuilder->CreateAlloca(mKernelStateType);
    123     Module * m = iBuilder->getModule();
    124     std::vector<Value *> init_args = {kernelInstance};
    125     for (auto a : args) {
    126         init_args.push_back(a);
    127     }
    128     for (auto b : inputBuffers) {
    129         init_args.push_back(b->getStreamSetStructPtr());
    130     }
    131     for (auto b : outputBuffers) {
    132         init_args.push_back(b->getStreamSetStructPtr());
    133     }
    134     std::string initFnName = mKernelName + init_suffix;
    135     Function * initMethod = m->getFunction(initFnName);
    136     if (!initMethod) {
    137         throw std::runtime_error("Cannot find " + initFnName);
    138     }
    139     iBuilder->CreateCall(initMethod, init_args);
    140     return kernelInstance;
    141 }
    142 
    143119Value * KernelInterface::createDoBlockCall(Value * self) {
    144120    Module * m = iBuilder->getModule();
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5141 r5174  
    77#define KERNEL_INTERFACE_H
    88
    9 
    109#include <string>
    1110#include <vector>
     
    1312#include <IDISA/idisa_builder.h>
    1413#include "streamset.h"
     14
    1515
    1616struct ScalarBinding {
     
    3333
    3434public:
     35    /*
     36     
     37     This class defines the methods to be used to generate the code 
     38     necessary for declaring, allocating, calling and synchronizing
     39     kernels.   The methods to be used for constructing kernels are defined
     40     within the KernelBuilder class of kernel.h
     41     
     42     */
     43   
     44    std::string & getName() { return mKernelName;}
     45   
     46    std::vector<StreamSetBinding> getStreamInputs() {return mStreamSetInputs;}
     47    std::vector<StreamSetBinding> getStreamOutputs() {return mStreamSetOutputs;}
     48    std::vector<ScalarBinding> getScalarInputs() { return mScalarInputs;}
     49    std::vector<ScalarBinding> getScalarOutputs() { return mScalarOutputs;}
     50   
     51   
     52    // Add ExternalLinkage method declarations for the kernel to a given client module.
     53    void addKernelDeclarations(Module * client);
     54   
     55    virtual llvm::Value * createInstance(std::vector<llvm::Value *> initialParameters);
     56
     57    llvm::Value * createDoSegmentCall(llvm::Value * kernelInstance, llvm::Value * blkCount);
     58    llvm::Value * createFinalBlockCall(llvm::Value * kernelInstance, llvm::Value * remainingBytes);
     59    llvm::Value * createGetAccumulatorCall(llvm::Value * kernelInstance, std::string accumName);
     60   
     61    unsigned getLookAhead() { return mLookAheadPositions; }
     62   
     63   
     64    virtual llvm::Value * getLogicalSegmentNo(llvm::Value * kernelInstance) = 0;
     65    virtual llvm::Value * getProcessedItemCount(llvm::Value * kernelInstance) = 0;
     66    virtual llvm::Value * getProducedItemCount(llvm::Value * kernelInstance) = 0;
     67    virtual llvm::Value * getTerminationSignal(llvm::Value * kernelInstance) = 0;
     68   
     69protected:
    3570    KernelInterface(IDISA::IDISA_Builder * builder,
    3671                    std::string kernelName,
     
    5085    mLookAheadPositions(0) {}
    5186   
    52     unsigned getLookAhead() { return mLookAheadPositions; }
    5387   
    54     // Add ExternalLinkage method declarations for the kernel to a given client module.
    55     void addKernelDeclarations(Module * client);
    56    
    57     virtual llvm::Value * createInstance(std::vector<llvm::Value *> initialParameters);
    58     llvm::Value * createInstance(std::vector<llvm::Value *> initialParameters, std::vector<parabix::StreamSetBuffer *> inputs, std::vector<parabix::StreamSetBuffer *> outputBuffers);
    59     llvm::Value * createDoBlockCall(llvm::Value * kernelInstance);
    60     llvm::Value * createDoSegmentCall(llvm::Value * kernelInstance, llvm::Value * blkCount);
    61     llvm::Value * createFinalBlockCall(llvm::Value * kernelInstance, llvm::Value * remainingBytes);
    62     llvm::Value * createGetAccumulatorCall(llvm::Value * kernelInstance, std::string accumName);
    63    
    64    
    65 protected:
    6688   
    6789    IDISA::IDISA_Builder * iBuilder;
     
    7698   
    7799    void setLookAhead(unsigned lookAheadPositions) {mLookAheadPositions = lookAheadPositions;}
     100    llvm::Value * createDoBlockCall(llvm::Value * kernelInstance);
    78101
    79102};
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5171 r5174  
    1010#include <llvm/Support/raw_ostream.h>
    1111#include <llvm/IR/TypeBuilder.h>
     12#include <llvm/Support/ErrorHandling.h>
    1213#include <toolchain.h>
    1314
     
    2627void KernelBuilder::addScalar(Type * t, std::string scalarName) {
    2728    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    28         throw std::runtime_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
     29        llvm::report_fatal_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
    2930    }
    3031    unsigned index = mKernelFields.size();
     
    3637    unsigned blockSize = iBuilder->getBitBlockWidth();
    3738    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    38         throw std::runtime_error("Kernel preparation: Incorrect number of input buffers");
     39        llvm::report_fatal_error("Kernel preparation: Incorrect number of input buffers");
    3940    }
    4041    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
    41         throw std::runtime_error("Kernel preparation: Incorrect number of output buffers");
     42        llvm::report_fatal_error("Kernel preparation: Incorrect number of output buffers");
    4243    }
    4344    addScalar(iBuilder->getSizeTy(), blockNoScalar);
     45    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
     46    addScalar(iBuilder->getSizeTy(), processedItemCount);
     47    addScalar(iBuilder->getSizeTy(), producedItemCount);
     48    addScalar(iBuilder->getInt1Ty(), terminationSignal);
    4449    int streamSetNo = 0;
    4550    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    4651        size_t bufferSize = mStreamSetInputBuffers[i]->getBufferSize() * blockSize;
    4752        if (!(mStreamSetInputBuffers[i]->getBufferStreamSetType() == mStreamSetInputs[i].ssType)) {
    48              throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
     53             llvm::report_fatal_error("Kernel preparation: Incorrect input buffer type");
    4954        }
    50         if ((bufferSize > 0) && (bufferSize < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
     55        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    5156             errs() << "buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
    52              throw std::runtime_error("Kernel preparation: Buffer size too small.");
     57             llvm::report_fatal_error("Kernel preparation: Buffer size too small.");
    5358        }
    54 
    5559        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
    5660        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
     
    5963    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    6064        if (!(mStreamSetOutputBuffers[i]->getBufferStreamSetType() == mStreamSetOutputs[i].ssType)) {
    61              throw std::runtime_error("Kernel preparation: Incorrect output buffer type");
     65             llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type");
    6266        }
    6367        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
     
    146150}
    147151
    148 //  The default doSegment method simply dispatches to the doBlock routine.
     152void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
     153    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     154    iBuilder->CreateCall(doBlockFunction, {self});
     155}
     156
     157//  The default doSegment method dispatches to the doBlock routine for
     158//  each block of the given number of blocksToDo, and then updates counts.
    149159void KernelBuilder::generateDoSegmentMethod() {
    150160    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    151161    Module * m = iBuilder->getModule();
    152     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    153162    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
    154163    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
     
    158167    BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
    159168    Type * const size_ty = iBuilder->getSizeTy();
     169    Value * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     170    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    160171   
    161172    Function::arg_iterator args = doSegmentFunction->arg_begin();
    162173    Value * self = &*(args++);
    163174    Value * blocksToDo = &*(args);
    164    
     175
     176    Value * segmentNo = getLogicalSegmentNo(self);
    165177    iBuilder->CreateBr(blockLoopCond);
    166178
     
    173185    iBuilder->SetInsertPoint(blockLoopBody);
    174186    Value * blockNo = getScalarField(self, blockNoScalar);   
    175     iBuilder->CreateCall(doBlockFunction, {self});
    176     setBlockNo(self, iBuilder->CreateAdd(blockNo, ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth())));
    177     blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, ConstantInt::get(size_ty, 1)), blockLoopBody);
     187    generateDoBlockLogic(self, blockNo);
     188    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
     189    blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, strideBlocks), blockLoopBody);
    178190    iBuilder->CreateBr(blockLoopCond);
    179191   
    180192    iBuilder->SetInsertPoint(blocksDone);
     193    setProcessedItemCount(self, iBuilder->CreateAdd(getProcessedItemCount(self), iBuilder->CreateMul(blocksToDo, stride)));
     194    // Must be the last action, for synchronization.
     195    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1)));
     196
    181197    iBuilder->CreateRetVoid();
    182198    iBuilder->restoreIP(savePoint);
     
    186202    const auto f = mInternalStateNameMap.find(fieldName);
    187203    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
    188         throw std::runtime_error("Kernel does not contain internal state: " + fieldName);
     204        llvm::report_fatal_error("Kernel does not contain internal state: " + fieldName);
    189205    }
    190206    return iBuilder->getInt32(f->second);
     
    203219}
    204220
     221Value * KernelBuilder::getLogicalSegmentNo(Value * self) {
     222    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     223    LoadInst * segNo = iBuilder->CreateAlignedLoad(ptr, sizeof(size_t));
     224    segNo->setOrdering(Acquire);
     225    return segNo;
     226}
     227
     228Value * KernelBuilder::getProcessedItemCount(Value * self) {
     229    return getScalarField(self, processedItemCount);
     230}
     231
     232Value * KernelBuilder::getProducedItemCount(Value * self) {
     233    return getScalarField(self, producedItemCount);
     234}
     235
     236//  By default, kernels do not terminate early. 
     237Value * KernelBuilder::getTerminationSignal(Value * self) {
     238    return ConstantInt::getNullValue(iBuilder->getInt1Ty());
     239}
     240
     241
     242void KernelBuilder::setLogicalSegmentNo(Value * self, Value * newCount) {
     243    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
     244    iBuilder->CreateAlignedStore(newCount, ptr, sizeof(size_t))->setOrdering(Release);
     245}
     246
     247void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
     248    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
     249    iBuilder->CreateStore(newCount, ptr);
     250}
     251
     252void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
     253    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
     254    iBuilder->CreateStore(newCount, ptr);
     255}
     256
     257void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) {
     258    llvm::report_fatal_error("This kernel type does not support setTerminationSignal.");
     259}
     260
     261
    205262Value * KernelBuilder::getBlockNo(Value * self) {
    206263    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    207     LoadInst * blockNo = iBuilder->CreateAlignedLoad(ptr, 8);
    208     blockNo->setOrdering(Acquire);
     264    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
    209265    return blockNo;
    210266}
     
    212268void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
    213269    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
    214     iBuilder->CreateAlignedStore(newFieldVal, ptr, 8)->setOrdering(Release);
     270    iBuilder->CreateStore(newFieldVal, ptr);
    215271}
    216272
     
    221277        if (arg->getName() == paramName) return arg;
    222278    }
    223     throw std::runtime_error("Method does not have parameter: " + paramName);
     279    llvm::report_fatal_error("Method does not have parameter: " + paramName);
    224280}
    225281
     
    227283    const auto f = mStreamSetNameMap.find(ssName);
    228284    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
    229         throw std::runtime_error("Kernel does not contain stream set: " + ssName);
     285        llvm::report_fatal_error("Kernel does not contain stream set: " + ssName);
    230286    }
    231287    return f->second;
     
    273329    Function * initMethod = m->getFunction(initFnName);
    274330    if (!initMethod) {
    275         throw std::runtime_error("Cannot find " + initFnName);
     331        llvm::report_fatal_error("Cannot find " + initFnName);
    276332    }
    277333    iBuilder->CreateCall(initMethod, init_args);
     
    306362        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
    307363        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
    308         inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getComsumerPosPtr(basePtr));
     364        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(basePtr));
    309365        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
    310366    }
     
    312368        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    313369        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
    314         outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getComsumerPosPtr(basePtr));
     370        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(basePtr));
    315371    }
    316372
     
    339395    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
    340396    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    341         LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], 8);
     397        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], sizeof(size_t));
    342398        producerPos->setOrdering(Acquire);
    343399        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
    344         LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], 8);
     400        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], sizeof(size_t));
    345401        consumerPos->setOrdering(Acquire);
    346402        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
     
    352408    iBuilder->SetInsertPoint(inputCheckBlock);
    353409
     410    Value * requiredSize = segSize;
     411    if (mLookAheadPositions > 0) {
     412        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
     413    }
    354414    waitCondTest = ConstantInt::get(int1ty, 1);
    355415    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
    356         LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], 8);
     416        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
    357417        producerPos->setOrdering(Acquire);
    358418        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
    359         LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], 8);
     419        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], sizeof(size_t));
    360420        consumerPos->setOrdering(Acquire);
    361421        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
    362         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, segSize), producerPos));
     422        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
    363423    }
    364424
     
    367427    iBuilder->SetInsertPoint(endSignalCheckBlock);
    368428   
    369     LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], 8);
     429    LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], sizeof(size_t));
    370430    // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
    371431    endSignal->setOrdering(Acquire);
    372432    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
    373         LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], 8);
     433        LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], sizeof(size_t));
    374434        endSignal_next->setOrdering(Acquire);
    375435        iBuilder->CreateAnd(endSignal, endSignal_next);
     
    384444    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    385445        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
    386         iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
    387     }
     446        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(Release);
     447    }
     448   
     449    Value * produced = getProducedItemCount(self);
    388450    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    389         Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), segSize);
    390         iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
    391     }
    392    
     451        iBuilder->CreateAlignedStore(produced, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(Release);
     452    }
     453   
     454    Value * earlyEndSignal = getTerminationSignal(self);
     455    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
     456        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
     457        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
     458
     459        iBuilder->SetInsertPoint(earlyEndBlock);
     460        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     461            Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     462            mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     463        }       
     464    }
    393465    iBuilder->CreateBr(outputCheckBlock);
    394466     
     
    415487    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    416488        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
    417         iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     489        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(Release);
    418490    }
    419491    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    420         Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
    421         iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     492       
     493        Value * produced = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
     494        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(Release);
    422495    }
    423496
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5165 r5174  
    1616
    1717const std::string blockNoScalar = "blockNo";
     18const std::string logicalSegmentNoScalar = "logicalSegNo";
     19const std::string processedItemCount = "processedItemCount";
     20const std::string producedItemCount = "producedItemCount";
     21const std::string terminationSignal = "terminationSignal";
    1822const std::string basePtrSuffix = "_basePtr";
    1923const std::string blkMaskSuffix = "_blkMask";
     
    4751
    4852    Value * getBlockNo(Value * self);
    49 
     53    virtual llvm::Value * getLogicalSegmentNo(llvm::Value * kernelInstance) override;
     54    virtual llvm::Value * getProcessedItemCount(llvm::Value * kernelInstance) override;
     55    virtual llvm::Value * getProducedItemCount(llvm::Value * kernelInstance) override;
     56    virtual llvm::Value * getTerminationSignal(llvm::Value * kernelInstance) override;
     57   
    5058   
    5159protected:
     
    6674    // doBlock calls.
    6775    virtual void generateDoBlockMethod() = 0;
    68    
     76    virtual void generateDoBlockLogic(Value * self, Value * blockNo);
     77
    6978    // Each kernel builder subtypre must also specify the logic for processing the
    7079    // final block of stream data, if there is any special processing required
     
    106115
    107116    void setBlockNo(Value * self, Value * newFieldVal);
    108        
     117    virtual void setLogicalSegmentNo(llvm::Value * self, Value * newFieldVal);
     118    virtual void setProcessedItemCount(llvm::Value * self, Value * newFieldVal);
     119    virtual void setProducedItemCount(llvm::Value * self, Value * newFieldVal);
     120    virtual void setTerminationSignal(llvm::Value * self, Value * newFieldVal);
     121   
     122   
    109123protected:
    110124
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5165 r5174  
    5353
    5454    iBuilder->SetInsertPoint(entryBlock);
    55 
    5655    Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
    5756    Value * myThreadId = ConstantInt::get(size_ty, id);
     
    6665    int segmentSize = codegen::SegmentSize;
    6766    Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
    68     Constant * hypersegmentBlocks = ConstantInt::get(size_ty, segmentSize * threadNum);
    6967    Constant * segmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize);
    7068    Constant * hypersegmentBytes = ConstantInt::get(size_ty, iBuilder->getStride() * segmentSize * threadNum);
    7169    Constant * const blockSize = ConstantInt::get(size_ty, iBuilder->getStride());
    7270
     71    Value * myFirstSegNo = myThreadId;  //
    7372    // The offset of my starting segment within the thread group hypersegment.
    74     Value * myBlockNo = iBuilder->CreateMul(segmentBlocks, myThreadId);
    7573    Value * myOffset = iBuilder->CreateMul(segmentBytes, myThreadId);
    7674    Value * fullSegLimit = iBuilder->CreateAdd(myOffset, segmentBytes);
     
    8179    PHINode * remainingBytes = iBuilder->CreatePHI(size_ty, 2, "remainingBytes");
    8280    remainingBytes->addIncoming(fileSize, entryBlock);
    83     PHINode * blockNo = iBuilder->CreatePHI(size_ty, 2, "blockNo");
    84     blockNo->addIncoming(myBlockNo, entryBlock);
     81    PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo");
     82    segNo->addIncoming(myFirstSegNo, entryBlock);
    8583
    8684    Value * LT_fullSegment = iBuilder->CreateICmpSLT(remainingBytes, fullSegLimit);
     
    8987    for (unsigned i = 0; i < kernels.size(); i++) {
    9088        iBuilder->SetInsertPoint(segmentWait[i]);
    91         Value * curBlockNo = kernels[i]->getBlockNo(instancePtrs[i]);
    92         Value * cond = iBuilder->CreateICmpEQ(curBlockNo, blockNo);
     89        Value * processedSegmentCount = kernels[i]->getLogicalSegmentNo(instancePtrs[i]);
     90        Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    9391        iBuilder->CreateCondBr(cond, segmentLoopBody[i], segmentWait[i]);
    9492
     
    10098   
    10199    remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, hypersegmentBytes), segmentLoopBody[kernels.size()-1]);
    102     blockNo->addIncoming(iBuilder->CreateAdd(blockNo, hypersegmentBlocks), segmentLoopBody[kernels.size()-1]);
     100    segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[kernels.size()-1]);
    103101    iBuilder->CreateBr(segmentLoop);
    104102
     
    114112    for (unsigned i = 0; i < kernels.size(); i++) {
    115113        iBuilder->SetInsertPoint(partialSegmentWait[i]);
    116         Value * curBlockNo = kernels[i]->getBlockNo(instancePtrs[i]);
    117         Value * cond = iBuilder->CreateICmpEQ(curBlockNo, blockNo);
     114        Value * processedSegmentCount = kernels[i]->getLogicalSegmentNo(instancePtrs[i]);
     115        Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    118116        iBuilder->CreateCondBr(cond, partialSegmentLoopBody[i], partialSegmentWait[i]);
    119117
     
    255253    BasicBlock * fullBodyBlock = BasicBlock::Create(iBuilder->getContext(), "fullBody", main, 0);
    256254    BasicBlock * finalBlock = BasicBlock::Create(iBuilder->getContext(), "final", main, 0);
     255    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", main, 0);
    257256   
    258257   
     
    316315        kernels[i]->createFinalBlockCall(instances[i], remainingBytes);
    317316    }
     317    iBuilder->CreateBr(exitBlock);
     318    iBuilder->SetInsertPoint(exitBlock);
     319
    318320}
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5106 r5174  
    157157
    158158   
    159 void s2pKernel::generateDoBlockMethod() {
    160     IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    161     Module * m = iBuilder->getModule();
    162 
    163     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    164    
    165     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    166    
    167     Value * self = getParameter(doBlockFunction, "self");
    168     Value * blockNo = getScalarField(self, blockNoScalar);
     159void s2pKernel::generateDoBlockLogic(Value * self, Value * blockNo) {
    169160    Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
    170161    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
     
    179170        iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    180171    }
     172}
     173   
     174void s2pKernel::generateDoBlockMethod() {
     175    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     176    Module * m = iBuilder->getModule();
     177   
     178    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     179   
     180    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     181   
     182    Value * self = getParameter(doBlockFunction, "self");
     183    Value * blockNo = getScalarField(self, blockNoScalar);
     184   
     185    generateDoBlockLogic(self, blockNo);
    181186    iBuilder->CreateRetVoid();
    182187    iBuilder->restoreIP(savePoint);
    183188}
    184 
    185    
    186189}
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5133 r5174  
    3333   
    3434private:
     35    void generateDoBlockLogic(Value * self, Value * blockNo) override;
    3536    void generateDoBlockMethod() override;
    3637    void generateFinalBlockMethod() override;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5142 r5174  
    3636}
    3737
    38 llvm::Value * StreamSetBuffer::getComsumerPosPtr(Value * ptr) {
     38llvm::Value * StreamSetBuffer::getConsumerPosPtr(Value * ptr) {
    3939    return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
    4040}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5142 r5174  
    5151    virtual void setProducerPos(Value * ptr, Value * pos);
    5252
    53     virtual llvm::Value * getComsumerPosPtr(Value * ptr);
     53    virtual llvm::Value * getConsumerPosPtr(Value * ptr);
    5454
    5555    virtual void setConsumerPos(Value * ptr, Value * pos);
Note: See TracChangeset for help on using the changeset viewer.