Ignore:
Timestamp:
Aug 29, 2016, 1:53:08 PM (3 years ago)
Author:
lindanl
Message:

Add pipeline parallel strategy to the framework.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5133 r5135  
    99#include <llvm/IR/Value.h>
    1010#include <llvm/Support/raw_ostream.h>
     11#include <llvm/IR/TypeBuilder.h>
     12#include <toolchain.h>
    1113
    1214using namespace llvm;
     
    4446             throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
    4547        }
    46         mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamBufferPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
     48        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
    4749        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
    4850        streamSetNo++;
     
    5254             throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
    5355        }
    54         mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamBufferPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
     56        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
    5557        mStreamSetNameMap.emplace(mStreamSetOutputs[i].ssName, streamSetNo);
    5658        streamSetNo++;
     
    243245    }
    244246    for (auto b : mStreamSetInputBuffers) {
    245         init_args.push_back(b->getStreamSetBasePtr());
     247        init_args.push_back(b->getStreamSetStructPtr());
    246248    }
    247249    for (auto b : mStreamSetOutputBuffers) {
    248         init_args.push_back(b->getStreamSetBasePtr());
     250        init_args.push_back(b->getStreamSetStructPtr());
    249251    }
    250252    std::string initFnName = mKernelName + init_suffix;
     
    257259}
    258260
    259 
    260 
    261 
     261Function * KernelBuilder::generateThreadFunction(std::string name){
     262    Module * m = iBuilder->getModule();
     263    Type * const voidTy = Type::getVoidTy(m->getContext());
     264    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
     265    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     266    Type * const int1ty = iBuilder->getInt1Ty();
     267
     268    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
     269    threadFunc->setCallingConv(CallingConv::C);
     270    Function::arg_iterator args = threadFunc->arg_begin();
     271
     272    Value * const arg = &*(args++);
     273    arg->setName("args");
     274
     275    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
     276
     277    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
     278
     279    std::vector<Value *> inbufProducerPtrs;
     280    std::vector<Value *> inbufConsumerPtrs;
     281    std::vector<Value *> outbufProducerPtrs;
     282    std::vector<Value *> outbufConsumerPtrs;   
     283    std::vector<Value *> endSignalPtrs;
     284
     285    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     286        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
     287        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
     288        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getComsumerPosPtr(basePtr));
     289        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
     290    }
     291    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     292        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     293        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
     294        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getComsumerPosPtr(basePtr));
     295    }
     296
     297    const unsigned segmentBlocks = codegen::SegmentSize;
     298    const unsigned bufferSegments = codegen::BufferSegments;
     299    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
     300    Type * const size_ty = iBuilder->getSizeTy();
     301
     302    Value * segSize = ConstantInt::get(size_ty, segmentSize);
     303    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
     304    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
     305   
     306    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
     307    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
     308   
     309    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
     310    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
     311    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
     312    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
     313    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
     314
     315    iBuilder->CreateBr(outputCheckBlock);
     316
     317    iBuilder->SetInsertPoint(outputCheckBlock);
     318
     319    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
     320    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     321        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], 8);
     322        producerPos->setOrdering(Acquire);
     323        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
     324        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], 8);
     325        consumerPos->setOrdering(Acquire);
     326        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
     327        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
     328    }
     329   
     330    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
     331
     332    iBuilder->SetInsertPoint(inputCheckBlock);
     333
     334    waitCondTest = ConstantInt::get(int1ty, 1);
     335    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
     336        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], 8);
     337        producerPos->setOrdering(Acquire);
     338        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
     339        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], 8);
     340        consumerPos->setOrdering(Acquire);
     341        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
     342        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, segSize), producerPos));
     343    }
     344
     345    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
     346   
     347    iBuilder->SetInsertPoint(endSignalCheckBlock);
     348   
     349    LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], 8);
     350    // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
     351    endSignal->setOrdering(Acquire);
     352    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
     353        LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], 8);
     354        endSignal_next->setOrdering(Acquire);
     355        iBuilder->CreateAnd(endSignal, endSignal_next);
     356    }
     357       
     358    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(endSignal, ConstantInt::get(iBuilder->getInt8Ty(), 1)), endBlock, inputCheckBlock);
     359   
     360    iBuilder->SetInsertPoint(doSegmentBlock);
     361 
     362    createDoSegmentCall(self, segBlocks);
     363
     364    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
     365        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
     366        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     367    }
     368    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     369        Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), segSize);
     370        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     371    }
     372   
     373    iBuilder->CreateBr(outputCheckBlock);
     374     
     375    iBuilder->SetInsertPoint(endBlock);
     376    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
     377    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
     378    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
     379    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
     380    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
     381    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
     382
     383    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
     384
     385    iBuilder->SetInsertPoint(doFinalSegBlock);
     386
     387    createDoSegmentCall(self, blocks);
     388
     389    iBuilder->CreateBr(doFinalBlock);
     390
     391    iBuilder->SetInsertPoint(doFinalBlock);
     392
     393    createFinalBlockCall(self, finalBlockRemainingBytes);
     394
     395    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
     396        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
     397        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     398    }
     399    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     400        Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
     401        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     402    }
     403
     404    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     405        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     406        mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     407    }
     408
     409    Value * nullVal = Constant::getNullValue(voidPtrTy);
     410    Function * pthreadExitFunc = m->getFunction("pthread_exit");
     411    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal});
     412    exitThread->setDoesNotReturn();
     413    iBuilder->CreateRetVoid();
     414
     415    return threadFunc;
     416
     417}
     418
     419
Note: See TracChangeset for help on using the changeset viewer.