Changeset 5135


Ignore:
Timestamp:
Aug 29, 2016, 1:53:08 PM (3 years ago)
Author:
lindanl
Message:

Add pipeline parallel strategy to the framework.

Location:
icGREP/icgrep-devel/icgrep
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5134 r5135  
    2424#include <llvm/Support/Debug.h>
    2525#include <llvm/IR/Verifier.h>
     26#include <llvm/IR/TypeBuilder.h>
    2627#include <UCD/UnicodeNameData.h>
    2728
     
    7879static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    7980static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
     81
     82static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(bGrepOutputOptions));
    8083
    8184
     
    117120
    118121using namespace parabix;
    119 
     122/*
    120123void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
    121124    isUTF_16 = UTF_16;
     
    213216
    214217}
    215 
    216 
     218*/
     219void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
     220    isUTF_16 = UTF_16;
     221    Module * M = new Module(moduleName, getGlobalContext());
     222   
     223    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);
     224
     225    const unsigned segmentSize = codegen::SegmentSize;
     226    const unsigned bufferSegments = codegen::BufferSegments;
     227
     228    Encoding::Type type;
     229    type = UTF_16 ? Encoding::Type::UTF_16 : Encoding::Type::UTF_8;
     230    unsigned bits;
     231    bits = UTF_16 ? 16 : 8;
     232
     233    Encoding encoding(type, bits);
     234    mIsNameExpression = isNameExpression;
     235
     236    Type * const int64ty = iBuilder->getInt64Ty();
     237    Type * const int32ty = iBuilder->getInt32Ty();
     238    Type * const size_ty = iBuilder->getSizeTy();
     239    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     240    Type * const voidTy = Type::getVoidTy(M->getContext());   
     241    Type * const voidPtrTy = TypeBuilder<void *, false>::get(M->getContext());
     242    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), (UTF_16 ? 16 : 8)), 1), 0);
     243    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     244    Function * const mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     245    mainFn->setCallingConv(CallingConv::C);
     246    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     247    Function::arg_iterator args = mainFn->arg_begin();
     248   
     249    Value * const inputStream = &*(args++);
     250    inputStream->setName("input");
     251    Value * const fileSize = &*(args++);
     252    fileSize->setName("fileSize");
     253    Value * const fileIdx = &*(args++);
     254    fileIdx->setName("fileIdx");
     255       
     256    ExternalUnboundedBuffer ByteStream(iBuilder, StreamSetType(1, i8));
     257    CircularBuffer BasisBits(iBuilder, StreamSetType(8, i1), segmentSize * bufferSegments);
     258
     259    kernel::s2pKernel  s2pk(iBuilder);
     260    s2pk.generateKernel({&ByteStream}, {&BasisBits});
     261
     262    re_ast = re::regular_expression_passes(encoding, re_ast);   
     263    pablo::PabloFunction * function = re::re2pablo_compiler(encoding, re_ast, CountOnly);
     264    pablo_function_passes(function);
     265
     266    ByteStream.setStreamSetBuffer(inputStream);
     267    BasisBits.allocateBuffer();
     268
     269    Value * producerPtr = ByteStream.getProducerPosPtr(ByteStream.getStreamSetStructPtr());
     270    iBuilder->CreateAlignedStore(fileSize, producerPtr, 8)->setOrdering(Release);
     271
     272    Value * s2pInstance = s2pk.createInstance({});
     273 
     274    Type * pthreadTy = int64ty; //Pthread Type for 64-bit machine.
     275    FunctionType * funVoidPtrVoidTy = FunctionType::get(voidTy, int8PtrTy, false);   
     276   
     277    Function * pthreadCreateFunc = cast<Function>(M->getOrInsertFunction("pthread_create",
     278                                        int32ty,
     279                                        pthreadTy->getPointerTo(),
     280                                        voidPtrTy,
     281                                        static_cast<Type *>(funVoidPtrVoidTy)->getPointerTo(),
     282                                        voidPtrTy, nullptr));
     283    pthreadCreateFunc->setCallingConv(llvm::CallingConv::C);
     284    Function * pthreadJoinFunc = cast<Function>(M->getOrInsertFunction("pthread_join",
     285                                        int32ty,
     286                                        pthreadTy,
     287                                        PointerType::get(int8PtrTy, 0), nullptr));
     288    pthreadJoinFunc->setCallingConv(llvm::CallingConv::C);
     289
     290    Function * pthreadExitFunc = cast<Function>(M->getOrInsertFunction("pthread_exit",
     291                                        voidTy,
     292                                        voidPtrTy, nullptr));
     293    pthreadExitFunc->addFnAttr(llvm::Attribute::NoReturn);
     294    pthreadExitFunc->setCallingConv(llvm::CallingConv::C);
     295   
     296    // Type * const pthreadsTy = ArrayType::get(pthreadTy, 3);
     297    // AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     298    // Value * pthreadsPtr1 = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     299    // Value * pthreadsPtr2 = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     300    // Value * pthreadsPtr3 = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     301    // Value * nullVal = Constant::getNullValue(voidPtrTy);
     302    // AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     303
     304    if (CountOnly) {
     305        pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {"matchedLineCount"});
     306        icgrepK.generateKernel({&BasisBits}, {});       
     307        Value * icgrepInstance = icgrepK.createInstance({});
     308
     309        if (pipelineParallel){
     310            // const auto ip = iBuilder->saveIP();
     311            // Function * s2p_func = s2pk.generateThreadFunction("s2p");
     312            // Function * icgrep_func = icgrepK.generateThreadFunction("icgrep");
     313            // iBuilder->restoreIP(ip);
     314
     315            // iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtr1, nullVal, s2p_func, iBuilder->CreateBitCast(s2pInstance, int8PtrTy)}));
     316            // iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtr2, nullVal, icgrep_func, iBuilder->CreateBitCast(icgrepInstance, int8PtrTy)}));
     317         
     318            // Value * threadID1 = iBuilder->CreateLoad(pthreadsPtr1);
     319            // Value * threadID2 = iBuilder->CreateLoad(pthreadsPtr2);
     320
     321            // iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadID1, status}));
     322            // iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadID2, status}));
     323            generatePipelineParallel(iBuilder, {&s2pk, &icgrepK}, {s2pInstance, icgrepInstance});
     324
     325        }
     326        else{
     327            generatePipelineLoop(iBuilder, {&s2pk, &icgrepK}, {s2pInstance, icgrepInstance}, fileSize);
     328
     329        }
     330       
     331        Value * matchCount = icgrepK.createGetAccumulatorCall(icgrepInstance, "matchedLineCount");
     332        iBuilder->CreateRet(matchCount);
     333
     334    }
     335    else {
     336        CircularBuffer MatchResults(iBuilder, StreamSetType(2, i1), segmentSize * bufferSegments);
     337        MatchResults.allocateBuffer();
     338
     339        pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
     340        icgrepK.generateKernel({&BasisBits},  {&MatchResults});
     341        Value * icgrepInstance = icgrepK.createInstance({});
     342
     343        kernel::scanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
     344        scanMatchK.generateKernel({&MatchResults}, {});               
     345        Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     346
     347        if (pipelineParallel){
     348            generatePipelineParallel(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance});
     349
     350        }
     351        else{
     352            generatePipelineLoop(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance}, fileSize);
     353        }
     354        // const auto ip = iBuilder->saveIP();
     355        // Function * s2p_func = s2pk.generateThreadFunction("s2p");
     356        // Function * icgrep_func = icgrepK.generateThreadFunction("icgrep");   
     357        // Function * scan_func = scanMatchK.generateThreadFunction("scanMatch");
     358        // iBuilder->restoreIP(ip);
     359       
     360        // iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtr1, nullVal, s2p_func, iBuilder->CreateBitCast(s2pInstance, int8PtrTy)}));
     361        // iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtr2, nullVal, icgrep_func, iBuilder->CreateBitCast(icgrepInstance, int8PtrTy)}));
     362        // iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtr3, nullVal, scan_func, iBuilder->CreateBitCast(scanMatchInstance, int8PtrTy)}));
     363
     364        // Value * threadID1 = iBuilder->CreateLoad(pthreadsPtr1);
     365        // Value * threadID2 = iBuilder->CreateLoad(pthreadsPtr2);
     366        // Value * threadID3 = iBuilder->CreateLoad(pthreadsPtr3);
     367     
     368        // iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadID1, status}));
     369        // iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadID2, status}));
     370        // iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadID3, status}));
     371        iBuilder->CreateRetVoid();
     372
     373    }
     374     
     375    mEngine = JIT_to_ExecutionEngine(M);
     376    ApplyObjectCache(mEngine);
     377    icgrep_Linking(M, mEngine);
     378
     379#ifndef NDEBUG
     380    verifyModule(*M, &dbgs());
     381#endif
     382
     383    mEngine->finalizeObject();
     384    delete iBuilder;
     385   
     386    if (CountOnly) {
     387        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
     388    } else {
     389        mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     390    }
     391
     392}
    217393
    218394re::CC *  GrepEngine::grepCodepoints() {
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5111 r5135  
    127127    }
    128128    for (auto b : inputBuffers) {
    129         init_args.push_back(b->getStreamSetBasePtr());
     129        init_args.push_back(b->getStreamSetStructPtr());
    130130    }
    131131    for (auto b : outputBuffers) {
    132         init_args.push_back(b->getStreamSetBasePtr());
     132        init_args.push_back(b->getStreamSetStructPtr());
    133133    }
    134134    std::string initFnName = mKernelName + init_suffix;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5133 r5135  
    99#include <llvm/IR/Value.h>
    1010#include <llvm/Support/raw_ostream.h>
     11#include <llvm/IR/TypeBuilder.h>
     12#include <toolchain.h>
    1113
    1214using namespace llvm;
     
    4446             throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
    4547        }
    46         mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamBufferPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
     48        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
    4749        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
    4850        streamSetNo++;
     
    5254             throw std::runtime_error("Kernel preparation: Incorrect input buffer type");
    5355        }
    54         mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamBufferPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
     56        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
    5557        mStreamSetNameMap.emplace(mStreamSetOutputs[i].ssName, streamSetNo);
    5658        streamSetNo++;
     
    243245    }
    244246    for (auto b : mStreamSetInputBuffers) {
    245         init_args.push_back(b->getStreamSetBasePtr());
     247        init_args.push_back(b->getStreamSetStructPtr());
    246248    }
    247249    for (auto b : mStreamSetOutputBuffers) {
    248         init_args.push_back(b->getStreamSetBasePtr());
     250        init_args.push_back(b->getStreamSetStructPtr());
    249251    }
    250252    std::string initFnName = mKernelName + init_suffix;
     
    257259}
    258260
    259 
    260 
    261 
     261Function * KernelBuilder::generateThreadFunction(std::string name){
     262    Module * m = iBuilder->getModule();
     263    Type * const voidTy = Type::getVoidTy(m->getContext());
     264    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
     265    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     266    Type * const int1ty = iBuilder->getInt1Ty();
     267
     268    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
     269    threadFunc->setCallingConv(CallingConv::C);
     270    Function::arg_iterator args = threadFunc->arg_begin();
     271
     272    Value * const arg = &*(args++);
     273    arg->setName("args");
     274
     275    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
     276
     277    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
     278
     279    std::vector<Value *> inbufProducerPtrs;
     280    std::vector<Value *> inbufConsumerPtrs;
     281    std::vector<Value *> outbufProducerPtrs;
     282    std::vector<Value *> outbufConsumerPtrs;   
     283    std::vector<Value *> endSignalPtrs;
     284
     285    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     286        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
     287        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
     288        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getComsumerPosPtr(basePtr));
     289        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
     290    }
     291    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     292        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     293        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
     294        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getComsumerPosPtr(basePtr));
     295    }
     296
     297    const unsigned segmentBlocks = codegen::SegmentSize;
     298    const unsigned bufferSegments = codegen::BufferSegments;
     299    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
     300    Type * const size_ty = iBuilder->getSizeTy();
     301
     302    Value * segSize = ConstantInt::get(size_ty, segmentSize);
     303    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
     304    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
     305   
     306    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
     307    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
     308   
     309    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
     310    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
     311    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
     312    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
     313    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
     314
     315    iBuilder->CreateBr(outputCheckBlock);
     316
     317    iBuilder->SetInsertPoint(outputCheckBlock);
     318
     319    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
     320    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     321        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], 8);
     322        producerPos->setOrdering(Acquire);
     323        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
     324        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], 8);
     325        consumerPos->setOrdering(Acquire);
     326        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
     327        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
     328    }
     329   
     330    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
     331
     332    iBuilder->SetInsertPoint(inputCheckBlock);
     333
     334    waitCondTest = ConstantInt::get(int1ty, 1);
     335    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
     336        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], 8);
     337        producerPos->setOrdering(Acquire);
     338        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
     339        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], 8);
     340        consumerPos->setOrdering(Acquire);
     341        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
     342        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, segSize), producerPos));
     343    }
     344
     345    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
     346   
     347    iBuilder->SetInsertPoint(endSignalCheckBlock);
     348   
     349    LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], 8);
     350    // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
     351    endSignal->setOrdering(Acquire);
     352    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
     353        LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], 8);
     354        endSignal_next->setOrdering(Acquire);
     355        iBuilder->CreateAnd(endSignal, endSignal_next);
     356    }
     357       
     358    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(endSignal, ConstantInt::get(iBuilder->getInt8Ty(), 1)), endBlock, inputCheckBlock);
     359   
     360    iBuilder->SetInsertPoint(doSegmentBlock);
     361 
     362    createDoSegmentCall(self, segBlocks);
     363
     364    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
     365        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
     366        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     367    }
     368    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     369        Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), segSize);
     370        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     371    }
     372   
     373    iBuilder->CreateBr(outputCheckBlock);
     374     
     375    iBuilder->SetInsertPoint(endBlock);
     376    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
     377    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
     378    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
     379    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
     380    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
     381    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
     382
     383    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
     384
     385    iBuilder->SetInsertPoint(doFinalSegBlock);
     386
     387    createDoSegmentCall(self, blocks);
     388
     389    iBuilder->CreateBr(doFinalBlock);
     390
     391    iBuilder->SetInsertPoint(doFinalBlock);
     392
     393    createFinalBlockCall(self, finalBlockRemainingBytes);
     394
     395    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
     396        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
     397        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], 8)->setOrdering(Release);
     398    }
     399    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     400        Value * producerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(outbufProducerPtrs[i]), remainingBytes);
     401        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], 8)->setOrdering(Release);
     402    }
     403
     404    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     405        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
     406        mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     407    }
     408
     409    Value * nullVal = Constant::getNullValue(voidPtrTy);
     410    Function * pthreadExitFunc = m->getFunction("pthread_exit");
     411    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal});
     412    exitThread->setDoesNotReturn();
     413    iBuilder->CreateRetVoid();
     414
     415    return threadFunc;
     416
     417}
     418
     419
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5133 r5135  
    4343   
    4444    llvm::Value * createInstance(std::vector<Value *> args) override;
     45
     46    Function * generateThreadFunction(std::string name);
    4547   
    4648protected:
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5126 r5135  
    1515#include <kernels/s2p_kernel.h>
    1616
     17#include <llvm/IR/TypeBuilder.h>
    1718
    1819using namespace kernel;
     20
     21void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels, std::vector<Value *> instances) {
     22 
     23    Module * m = iBuilder->getModule();
     24
     25    Type * pthreadTy = iBuilder->getInt64Ty(); //Pthread Type for 64-bit machine.     
     26    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
     27    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     28
     29    Type * const pthreadsTy = ArrayType::get(pthreadTy, kernels.size());
     30    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     31    std::vector<Value *> pthreadsPtrs;
     32    for (unsigned i = 0; i < kernels.size(); i++) {
     33        pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
     34    }
     35    Value * nullVal = Constant::getNullValue(voidPtrTy);
     36    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     37
     38    std::vector<Function *> kernel_functions;
     39    const auto ip = iBuilder->saveIP();
     40    for (unsigned i = 0; i < kernels.size(); i++) {
     41        kernel_functions.push_back(kernels[i]->generateThreadFunction("k_"+std::to_string(i)));
     42    }
     43    iBuilder->restoreIP(ip);
     44
     45    Function * pthreadCreateFunc = m->getFunction("pthread_create");
     46    Function * pthreadJoinFunc = m->getFunction("pthread_join");
     47
     48    for (unsigned i = 0; i < kernels.size(); i++) {
     49        iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtrs[i], nullVal, kernel_functions[i], iBuilder->CreateBitCast(instances[i], int8PtrTy)}));
     50    }
     51
     52    std::vector<Value *> threadIDs;
     53    for (unsigned i = 0; i < kernels.size(); i++) {
     54        threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
     55    }
     56   
     57    for (unsigned i = 0; i < kernels.size(); i++) {
     58        iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadIDs[i], status}));
     59    }
     60}
    1961
    2062
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r5086 r5135  
    1010#include <kernels/kernel.h>
    1111
     12
    1213void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, std::vector<kernel::KernelBuilder *> kernels, std::vector<llvm::Value *> instances, llvm::Value * totalBytes);
    1314
     15void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, std::vector<kernel::KernelBuilder *> kernels, std::vector<llvm::Value *> instances);
    1416
    1517#endif // PIPELINE_H
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5125 r5135  
    1313using namespace parabix;
    1414
     15enum SS_struct_index {iProducer_pos = 0, iConsumer_pos = 1, iEnd_of_input = 2, iBuffer_ptr = 3};
     16
    1517llvm::Type * StreamSetType::getStreamSetBlockType(IDISA::IDISA_Builder * iBuilder) {
    1618    llvm::Type * streamType = mFieldWidth == 1 ? iBuilder->getBitBlockType() : ArrayType::get(iBuilder->getBitBlockType(), mFieldWidth);
     
    2224}
    2325
     26llvm::PointerType * StreamSetBuffer::getStreamSetStructPointerType() {
     27    return PointerType::get(mStreamSetStructType, 0);
     28}
     29
     30llvm::Value * StreamSetBuffer::getProducerPosPtr(Value * ptr) {
     31    return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)});
     32}
     33
     34void StreamSetBuffer::setProducerPos(Value * ptr, llvm::Value * pos){
     35    iBuilder->CreateStore(pos, iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     36}
     37
     38llvm::Value * StreamSetBuffer::getComsumerPosPtr(Value * ptr) {
     39    return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
     40}
     41
     42void StreamSetBuffer::setConsumerPos(Value * ptr, Value * pos){
     43    iBuilder->CreateStore(pos, iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     44}
     45
     46llvm::Value * StreamSetBuffer::hasEndOfInputPtr(Value * ptr) {
     47    return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)});
     48}
     49
     50void StreamSetBuffer::setEndOfInput(Value * ptr){
     51    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), 1), iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
     52}
     53
     54llvm::Value * StreamSetBuffer::getStreamSetStructPtr(){
     55    return mStreamSetStructPtr;
     56}
    2457// Single Block Buffer
    2558
     
    2962
    3063llvm::Value * SingleBlockBuffer::allocateBuffer() {
    31     mStreamSetBufferPtr = iBuilder->CreateAlloca(mStreamSetType.getStreamSetBlockType(iBuilder));
     64    Type * const int64ty = iBuilder->getInt64Ty();
     65    Type * const int8ty = iBuilder->getInt8Ty();
     66    mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
     67    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     68    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     69    iBuilder->CreateStore(ConstantInt::get(int8ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
     70    mStreamSetBufferPtr = iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
    3271    return mStreamSetBufferPtr;
    3372}
    3473
    3574// For a single block buffer, the block pointer is always the buffer base pointer.
    36 llvm::Value * SingleBlockBuffer::getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) {
    37     return bufferBasePtr;
     75llvm::Value * SingleBlockBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
     76    return iBuilder->CreateGEP(mStreamSetType.getStreamSetBlockType(iBuilder), basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(3)});
    3877}
    3978
     
    4685
    4786void ExternalUnboundedBuffer::setStreamSetBuffer(llvm::Value * ptr) {
    48     PointerType * t = PointerType::get(mStreamSetType.getStreamSetBlockType(iBuilder), mAddrSpace);
    49    
     87
     88    Type * const int64ty = iBuilder->getInt64Ty();
     89    Type * const int8ty = iBuilder->getInt8Ty();
     90
     91    PointerType * t = getStreamBufferPointerType();   
    5092    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
     93
     94    mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
     95    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     96    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     97    iBuilder->CreateStore(ConstantInt::get(int8ty, 1), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
     98    iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
     99
    51100}
    52101
     
    55104}
    56105
    57 llvm::Value * ExternalUnboundedBuffer::allocateBuffer() {
     106llvm::Value * ExternalUnboundedBuffer::allocateBuffer() { 
    58107    throw std::runtime_error("External buffers cannot be allocated.");
    59108}
    60109
    61 llvm::Value * ExternalUnboundedBuffer::getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) {
    62     PointerType * t = getStreamBufferPointerType();
    63     return iBuilder->CreateGEP(iBuilder->CreatePointerBitCastOrAddrSpaceCast(bufferBasePtr, t), {blockNo});
     110llvm::Value * ExternalUnboundedBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
     111    Value * handle = iBuilder->CreateGEP(basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     112    return iBuilder->CreateGEP(iBuilder->CreateLoad(handle), {blockNo});
    64113}
    65114
     
    72121
    73122llvm::Value * CircularBuffer::allocateBuffer() {
     123    Type * const int64ty = iBuilder->getInt64Ty();
     124    Type * const int8ty = iBuilder->getInt8Ty();
    74125    mStreamSetBufferPtr = iBuilder->CreateAlloca(mStreamSetType.getStreamSetBlockType(iBuilder), ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks));
     126    mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
     127    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     128    iBuilder->CreateStore(ConstantInt::get(int64ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     129    iBuilder->CreateStore(ConstantInt::get(int8ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
     130    iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
     131
    75132    return mStreamSetBufferPtr;
    76133}
    77134
    78 llvm::Value * CircularBuffer::getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) {
    79     return iBuilder->CreateGEP(mStreamSetType.getStreamSetBlockType(iBuilder), bufferBasePtr, {iBuilder->CreateAnd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks-1))});
     135llvm::Value * CircularBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
     136    Value * handle = iBuilder->CreateGEP(basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     137    return iBuilder->CreateGEP(iBuilder->CreateLoad(handle), {iBuilder->CreateAnd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks-1))});
    80138}
    81139
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5133 r5135  
    4747    virtual llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) = 0;
    4848   
    49    
     49    virtual llvm::Value * getProducerPosPtr(Value * ptr);
     50
     51    virtual void setProducerPos(Value * ptr, Value * pos);
     52
     53    virtual llvm::Value * getComsumerPosPtr(Value * ptr);
     54
     55    virtual void setConsumerPos(Value * ptr, Value * pos);
     56
     57    virtual llvm::Value * hasEndOfInputPtr(Value * ptr);
     58
     59    virtual void setEndOfInput(Value * ptr);
     60
     61    virtual llvm::PointerType * getStreamSetStructPointerType();
     62
     63    virtual llvm::Value * getStreamSetStructPtr();
    5064   
    5165protected:
     
    5973    int mAddrSpace;
    6074    llvm::Value * mStreamSetBufferPtr;
     75    llvm::Value * mStreamSetStructPtr;
     76    llvm::Type * mStreamSetStructType;
    6177
    6278};   
     
    6884   
    6985    SingleBlockBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type) :
    70     StreamSetBuffer(BufferKind::BlockBuffer, b, ss_type) {}
     86    StreamSetBuffer(BufferKind::BlockBuffer, b, ss_type) {
     87        mStreamSetStructType = StructType::get(getGlobalContext(),
     88                                               std::vector<Type *>({iBuilder->getInt64Ty(),
     89                                                                    iBuilder->getInt64Ty(),
     90                                                                    iBuilder->getInt8Ty(),
     91                                                                    mStreamSetType.getStreamSetBlockType(iBuilder)}));
     92
     93    }
    7194   
    7295    size_t getBufferSize() override;
     
    83106            mBufferBlocks = 0;
    84107            mAddrSpace = AddressSpace;
     108            mStreamSetStructType = StructType::get(getGlobalContext(),
     109                                                   std::vector<Type *>({iBuilder->getInt64Ty(),
     110                                                                        iBuilder->getInt64Ty(),
     111                                                                        iBuilder->getInt8Ty(),
     112                                                                        getStreamBufferPointerType()}));
    85113        }
    86114    llvm::PointerType * getStreamBufferPointerType() override;
     
    107135                throw std::runtime_error("CircularStreamSetBuffer: number of blocks must be a power of 2!");
    108136            }
     137            mStreamSetStructType = StructType::get(getGlobalContext(),
     138                                                   std::vector<Type *>({iBuilder->getInt64Ty(),
     139                                                                        iBuilder->getInt64Ty(),
     140                                                                        iBuilder->getInt8Ty(),
     141                                                                        getStreamBufferPointerType()}));
     142 
    109143        }
    110144
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5117 r5135  
    5353int BlockSize;
    5454int SegmentSize;
     55int BufferSegments;
    5556
    5657static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
    5758static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
     59static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
    5860
    5961const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5108 r5135  
    1919extern int BlockSize;  // set from command line
    2020extern int SegmentSize;  // set from command line
     21extern int BufferSegments;
    2122
    2223}
Note: See TracChangeset for help on using the changeset viewer.