Changeset 5263


Ignore:
Timestamp:
Jan 17, 2017, 12:00:43 PM (10 months ago)
Author:
cameron
Message:

New doSegment partial progress

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5260 r5263  
    8787#if(IWYU_PATH)
    8888#cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
    89 #execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-libgcc-file-name OUTPUT_VARIABLE LIBGCC_FILE)
    90 #get_filename_component(LIBGCC_PATH ${LIBGCC_FILE} DIRECTORY)
    91 #include_directories("${LIBGCC_PATH}/include")
     89execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-libgcc-file-name OUTPUT_VARIABLE LIBGCC_FILE)
     90get_filename_component(LIBGCC_PATH ${LIBGCC_FILE} DIRECTORY)
     91include_directories("${LIBGCC_PATH}/include")
    9292#set_property(TARGET CodeGen PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
    9393#set_property(TARGET PabloADT PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5260 r5263  
    8282
    8383    // Create the doSegment function prototype.
    84     std::vector<Type *> doSegmentParameters = {selfType, iBuilder->getSizeTy()};
     84    std::vector<Type *> doSegmentParameters = {selfType, iBuilder->getInt1Ty()};
     85    for (auto ss : mStreamSetInputs) {
     86        doSegmentParameters.push_back(iBuilder->getSizeTy());
     87    }
    8588    FunctionType * doSegmentFunctionType = FunctionType::get(iBuilder->getVoidTy(), doSegmentParameters, false);
    8689    std::string doSegmentName = mKernelName + doSegment_suffix;
     
    9295    arg->setName("self");
    9396    arg = &*(args++);
    94     arg->setName("blockCnt");
     97    arg->setName("doFinal");
     98    for (auto ss : mStreamSetInputs) {
     99        arg = &*(args++);
     100        arg->setName(ss.name + "_availableItems");
     101    }
    95102    doSegmentFn->setDoesNotCapture(1); // for self parameter only.
    96     //
    97     // Create the finalSegment function prototype.
    98     std::vector<Type *> finalSegmentParameters = {selfType, iBuilder->getSizeTy()};
    99     FunctionType * finalSegmentFunctionType = FunctionType::get(iBuilder->getVoidTy(), finalSegmentParameters, false);
    100     std::string finalSegmentName = mKernelName + finalSegment_suffix;
    101     Function * finalSegmentFn = Function::Create(finalSegmentFunctionType, GlobalValue::ExternalLinkage, finalSegmentName, client);
    102     finalSegmentFn->setCallingConv(CallingConv::C);
    103     finalSegmentFn->setDoesNotThrow();
    104     Function::arg_iterator finalSegmentArgs = finalSegmentFn->arg_begin();
    105     Value * finalSegmentArg = &*(finalSegmentArgs++);
    106     finalSegmentArg->setName("self");
    107     finalSegmentArg = &*(finalSegmentArgs++);
    108     finalSegmentArg->setName("blockCnt");
    109     finalSegmentFn->setDoesNotCapture(1); // for self parameter only.
    110103    iBuilder->setModule(saveModule);
    111104    iBuilder->restoreIP(savePoint);
     
    139132
    140133
    141 Value * KernelInterface::createDoSegmentCall(Value * self, Value * blksToDo) const {
     134Value * KernelInterface::createDoSegmentCall(std::vector<Value *> args) const {
    142135    Module * m = iBuilder->getModule();
    143136    std::string fnName = mKernelName + doSegment_suffix;
     
    146139        throw std::runtime_error("Cannot find " + fnName);
    147140    }
    148     return iBuilder->CreateCall(method, {self, blksToDo});
    149 }
    150 
    151 Value * KernelInterface::createFinalSegmentCall(Value * self, Value * blksToDo) const {
    152     Module * m = iBuilder->getModule();
    153     std::string fnName = mKernelName + finalSegment_suffix;
    154     Function * method = m->getFunction(fnName);
    155     if (!method) {
    156         throw std::runtime_error("Cannot find " + fnName);
    157     }
    158     return iBuilder->CreateCall(method, {self, blksToDo});
     141    return iBuilder->CreateCall(method, args);
    159142}
    160143
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5260 r5263  
    2525static const std::string doBlock_suffix = "_DoBlock";
    2626static const std::string doSegment_suffix = "_DoSegment";
    27 static const std::string finalSegment_suffix = "_FinalSegment";
    2827static const std::string finalBlock_suffix = "_FinalBlock";
    2928static const std::string accumulator_infix = "_get_";
     
    4140     */
    4241   
    43     std::string getName() { return mKernelName;}
     42    std::string getName() const { return mKernelName;}
    4443       
    4544    std::vector<Binding> getStreamInputs() {return mStreamSetInputs;}
     
    5554    llvm::Value * getInstance() const { return mKernelInstance; }
    5655
    57     llvm::Value * createDoSegmentCall(llvm::Value * self, llvm::Value * blkCount) const;
    58     llvm::Value * createFinalSegmentCall(llvm::Value * self, llvm::Value * blkCount) const;
     56    llvm::Value * createDoSegmentCall(std::vector<llvm::Value *> args) const;
    5957    llvm::Value * createFinalBlockCall(llvm::Value * self, llvm::Value * remainingBytes) const;
    6058    llvm::Value * createGetAccumulatorCall(llvm::Value * self, std::string accumName) const;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5261 r5263  
    120120    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
    121121    generateDoSegmentMethod();
    122     generateFinalSegmentMethod();
    123122
    124123    // Implement the accumulator get functions
     
    132131        iBuilder->CreateRet(retVal);
    133132    }
    134     generateInitMethod();
    135133    iBuilder->restoreIP(savePoint);
    136134}
     
    166164    /* Skip "remaining" arg */ args++;
    167165    std::vector<Value *> doBlockArgs = {self};
    168     while (args != finalBlockFunction->arg_end()){
    169         doBlockArgs.push_back(&*args++);
    170     }
    171166    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    172167    iBuilder->CreateRetVoid();
     
    188183    Module * m = iBuilder->getModule();
    189184    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
    190     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
     185    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), mKernelName + "_entry", doSegmentFunction, 0));
    191186    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    192     BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
    193     BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
    194     BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
    195     BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
    196     BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
     187    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopCond", doSegmentFunction, 0);
     188    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopBody", doSegmentFunction, 0);
     189    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_stridesDone", doSegmentFunction, 0);
     190    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_doFinalBlock", doSegmentFunction, 0);
     191    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
     192    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_finalExit", doSegmentFunction, 0);
    197193    Type * const size_ty = iBuilder->getSizeTy();
    198194    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     
    201197    Function::arg_iterator args = doSegmentFunction->arg_begin();
    202198    Value * self = &*(args++);
    203     Value * blocksToDo = &*(args);
    204    
    205     std::vector<Value *> inbufProducerPtrs;
    206     std::vector<Value *> endSignalPtrs;
    207     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    208         Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
    209         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
    210         endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
    211     }
     199    Value * doFinal = &*(args++);
    212200   
    213201    std::vector<Value *> producerPos;
    214     /* Determine the actually available data examining all input stream sets. */
    215     LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
    216     producerPos.push_back(p);
     202    producerPos.push_back(&*(args++));
    217203    Value * availablePos = producerPos[0];
    218     for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
    219         LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
     204    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     205        Value * p = &*(args++);
    220206        producerPos.push_back(p);
    221         /* Set the available position to be the minimum of availablePos and producerPos. */
    222207        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
    223208    }
    224209    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
    225210    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    226 //#ifndef NDEBUG
    227 //    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
    228 //#endif
    229     Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
    230     Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
    231     /* Adjust the number of full blocks to do, based on the available data, if necessary. */
    232     Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
    233     stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
    234     //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
     211    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
    235212    iBuilder->CreateBr(strideLoopCond);
    236213
     
    250227   
    251228    iBuilder->SetInsertPoint(stridesDone);
    252    
     229    // Update counts for the full strides processed.
    253230    Value * segmentItemsProcessed = iBuilder->CreateMul(stridesToDo, stride);
    254231    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     
    262239        }
    263240    }
     241   
     242    // Now conditionally perform the final block processing depending on the doFinal parameter.
     243    iBuilder->CreateCondBr(doFinal, doFinalBlock, segmentDone);
     244    iBuilder->SetInsertPoint(doFinalBlock);
     245
     246    Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
     247    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
     248   
     249    createFinalBlockCall(self, remainingItems);
     250    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     251        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
     252        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, remainingItems));
     253    }
     254    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
     255        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     256            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
     257            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, remainingItems));
     258        }
     259    }
    264260    iBuilder->CreateBr(segmentDone);
     261   
    265262    iBuilder->SetInsertPoint(segmentDone);
    266263//#ifndef NDEBUG
     
    280277}
    281278
    282 void KernelBuilder::generateFinalSegmentMethod() const {
    283     auto savePoint = iBuilder->saveIP();
    284     Module * m = iBuilder->getModule();
    285     Function * finalSegmentFunction = m->getFunction(mKernelName + finalSegment_suffix);
    286     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalSegmentFunction, 0));
    287     BasicBlock * doStrides = BasicBlock::Create(iBuilder->getContext(), "doStrides", finalSegmentFunction, 0);
    288     BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", finalSegmentFunction, 0);
    289     Type * const size_ty = iBuilder->getSizeTy();
    290     Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
    291     Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    292     Function::arg_iterator args = finalSegmentFunction->arg_begin();
    293     Value * self = &*(args++);
    294     Value * blocksToDo = &*(args);
    295     std::vector<Value *> inbufProducerPtrs;
    296     std::vector<Value *> endSignalPtrs;
    297     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    298         Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
    299         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
    300         endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
    301     }
    302    
    303     std::vector<Value *> producerPos;
    304     /* Determine the actually available data examining all input stream sets. */
    305     LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
    306     producerPos.push_back(p);
    307     Value * availablePos = producerPos[0];
    308     for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
    309         LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
    310         producerPos.push_back(p);
    311         /* Set the available position to be the minimum of availablePos and producerPos. */
    312         availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
    313     }
    314     Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
    315     Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    316 //#ifndef NDEBUG
    317 //    iBuilder->CallPrintInt(mKernelName + "_itemsAvail final", itemsAvail);
    318 //#endif
    319     Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
    320     Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
    321     /* Adjust the number of full blocks to do, based on the available data, if necessary. */
    322     Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
    323     stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
    324     Value * notDone = iBuilder->CreateICmpUGT(stridesToDo, ConstantInt::get(size_ty, 0));
    325     iBuilder->CreateCondBr(notDone, doStrides, stridesDone);
    326    
    327     iBuilder->SetInsertPoint(doStrides);
    328     createDoSegmentCall(self, blocksToDo);
    329     iBuilder->CreateBr(stridesDone);
    330    
    331     iBuilder->SetInsertPoint(stridesDone);
    332     /* Now at most a partial block remains. */
    333    
    334     processed = getProcessedItemCount(self, mStreamSetInputs[0].name);   
    335     Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
    336     //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
    337        
    338     createFinalBlockCall(self, remainingItems);
    339    
    340     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    341         Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
    342         setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, remainingItems));
    343     }
    344     if (!mDoBlockUpdatesProducedItemCountsAttribute) {
    345         for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    346             Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    347             setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, remainingItems));
    348         }
    349     }
    350 //#ifndef NDEBUG
    351 //    iBuilder->CallPrintInt(mKernelName + "_processed final", processed);
    352 //#endif
    353     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    354         Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    355         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    356         Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
    357         iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
    358     }
    359 
    360     iBuilder->CreateRetVoid();
    361 
    362     iBuilder->restoreIP(savePoint);
    363 }
    364 
    365 
    366279
    367280ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
     
    520433    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    521434    Type * const int1ty = iBuilder->getInt1Ty();
    522 
     435   
    523436    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
    524437    threadFunc->setCallingConv(CallingConv::C);
    525438    Function::arg_iterator args = threadFunc->arg_begin();
    526 
     439   
    527440    Value * const arg = &*(args++);
    528441    arg->setName("args");
    529 
     442   
    530443    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
    531 
     444   
    532445    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
    533 
     446   
    534447    std::vector<Value *> inbufProducerPtrs;
    535448    std::vector<Value *> inbufConsumerPtrs;
     
    537450    std::vector<Value *> outbufConsumerPtrs;   
    538451    std::vector<Value *> endSignalPtrs;
    539 
     452   
    540453    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    541454        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
     
    549462        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    550463    }
    551 
     464   
    552465    const unsigned segmentBlocks = codegen::SegmentSize;
    553466    const unsigned bufferSegments = codegen::BufferSegments;
    554467    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
    555468    Type * const size_ty = iBuilder->getSizeTy();
    556 
     469   
    557470    Value * segSize = ConstantInt::get(size_ty, segmentSize);
    558471    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
    559     Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
    560    
     472   
    561473    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
    562474    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
     
    565477    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
    566478    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
    567     BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
    568     BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
    569 
     479   
    570480    iBuilder->CreateBr(outputCheckBlock);
    571 
     481   
    572482    iBuilder->SetInsertPoint(outputCheckBlock);
    573 
     483   
    574484    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
    575485    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
     
    582492   
    583493    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
    584 
     494   
    585495    iBuilder->SetInsertPoint(inputCheckBlock);
    586 
     496   
    587497    Value * requiredSize = segSize;
    588498    if (mLookAheadPositions > 0) {
     
    597507        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
    598508    }
    599 
     509   
    600510    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
    601    
     511    
    602512    iBuilder->SetInsertPoint(endSignalCheckBlock);
    603513   
     
    607517        iBuilder->CreateAnd(endSignal, endSignal_next);
    608518    }
    609        
     519   
    610520    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
    611521   
    612522    iBuilder->SetInsertPoint(doSegmentBlock);
    613  
    614     createDoSegmentCall(self, segBlocks);
    615 
     523   
     524    // needs positions
     525    createDoSegmentCall({self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
     526   
    616527    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    617528        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
     
    628539        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
    629540        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
    630 
     541       
    631542        iBuilder->SetInsertPoint(earlyEndBlock);
    632543        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     
    636547    }
    637548    iBuilder->CreateBr(outputCheckBlock);
    638      
     549   
    639550    iBuilder->SetInsertPoint(endBlock);
    640551    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
    641552    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
    642553    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
    643     Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
    644     Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
    645     Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
    646 
    647     iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
    648 
    649     iBuilder->SetInsertPoint(doFinalSegBlock);
    650 
    651     createDoSegmentCall(self, blocks);
    652 
    653     iBuilder->CreateBr(doFinalBlock);
    654 
    655     iBuilder->SetInsertPoint(doFinalBlock);
    656 
    657     createFinalBlockCall(self, finalBlockRemainingBytes);
    658 
     554   
     555        // needs positions
     556    createDoSegmentCall({self, ConstantInt::getAllOnesValue(iBuilder->getInt1Ty())});
     557   
     558   
    659559    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    660560        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
     
    664564        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
    665565    }
    666 
     566   
    667567    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    668568        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    669569        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    670570    }
    671 
     571   
    672572    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
    673573    iBuilder->CreateRetVoid();
    674 
     574   
    675575    return threadFunc;
    676 
     576   
    677577}
    678578
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5261 r5263  
    132132    virtual void generateDoSegmentMethod() const;
    133133   
    134     virtual void generateFinalSegmentMethod() const;
    135    
    136134    // Add an additional scalar field to the KernelState struct.
    137135    // Must occur before any call to addKernelDeclarations or createKernelModule.
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5260 r5263  
    2929   
    3030    Function::arg_iterator args = doSegmentFunction->arg_begin();
    31     Value * self = &*(args);
     31    Value * self = &*(args++);
    3232   
    3333    Value * fileItems = getScalarField(self, "fileSize");
     
    5555}
    5656
    57 void MMapSourceKernel::generateFinalSegmentMethod() const {
    58     auto savePoint = iBuilder->saveIP();
    59     Module * m = iBuilder->getModule();
    60     Function * finalSegmentFunction = m->getFunction(mKernelName + finalSegment_suffix);
    61     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalSegmentFunction, 0));
    62        
    63     Function::arg_iterator args = finalSegmentFunction->arg_begin();
    64     Value * self = &*(args++);
    65     Value * blocksToDo = &*(args);
    66    
    67     createDoSegmentCall(self, blocksToDo);
    68 
    69     iBuilder->CreateRetVoid();
    70     iBuilder->restoreIP(savePoint);
    71 }
    72 
    73 
    7457// The doBlock method is deprecated.   But in case it is used, just call doSegment with
    7558// 1 as the number of blocks to do.
     
    8164    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
    8265    Value * self = getParameter(doBlockFunction, "self");
    83     iBuilder->CreateCall(doSegmentFunction, {self, iBuilder->getSize(1)});
     66    iBuilder->CreateCall(doSegmentFunction, {self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
    8467    iBuilder->CreateRetVoid();
    8568    iBuilder->restoreIP(savePoint);
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5260 r5263  
    2525    void generateDoBlockMethod() const override;
    2626    void generateDoSegmentMethod() const override;
    27     void generateFinalSegmentMethod() const override;
    2827   
    2928};
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5260 r5263  
    99#include <kernels/interface.h>
    1010#include <kernels/kernel.h>
    11 #include <kernels/s2p_kernel.h>
    1211#include <iostream>
    1312#include <unordered_map>
     
    1716using namespace llvm;
    1817
    19 #if 0
    20 
    21 using BufferMap = std::unordered_map<StreamSetBuffer *, std::pair<KernelBuilder *, unsigned>>;
    22 
    23 static void createStreamBufferMap(BufferMap & bufferMap, const std::vector<KernelBuilder *> & kernels) {
    24     for (auto k: kernels) {
    25         auto outputSets = k->getStreamSetOutputBuffers();
    26         for (unsigned i = 0; i < outputSets.size(); i++) {
    27             bufferMap.insert(std::make_pair(outputSets[i], std::make_pair(k, i)));
    28         }
    29     }
    30     for (auto k: kernels) {
    31         auto inputSets = k->getStreamSetInputBuffers();
     18using ProducerTable = std::vector<std::vector<std::pair<unsigned, unsigned>>>;
     19
     20ProducerTable createProducerTable(const std::vector<KernelBuilder *> & kernels) {
     21    ProducerTable producerTable;
     22    producerTable.reserve(kernels.size());
     23   
     24    std::vector<std::vector<bool>> userTable;
     25    userTable.reserve(kernels.size());
     26   
     27    // First prepare a map from streamSet output buffers to their producing kernel and output index.
     28    std::unordered_map<const StreamSetBuffer *, std::pair<unsigned, unsigned>> bufferMap;
     29   
     30    for (unsigned k = 0; k < kernels.size(); k++) {
     31        auto outputSets = kernels[k]->getStreamSetOutputBuffers();
     32        for (unsigned j = 0; j < outputSets.size(); j++) {
     33            userTable[k].push_back(false);
     34            bufferMap.insert(std::make_pair(outputSets[j], std::make_pair(k, j)));
     35        }
     36    }
     37    for (unsigned k = 0; k < kernels.size(); k++) {
     38        auto inputSets = kernels[k]->getStreamSetInputBuffers();
    3239        for (unsigned i = 0; i < inputSets.size(); i++) {
    33             if (bufferMap.find(inputSets[i]) == bufferMap.end()) {
    34                 llvm::report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + k->getName() + ": no corresponding output buffer. ");
     40            auto f = bufferMap.find(inputSets[i]);
     41            if (f == bufferMap.end()) {
     42                llvm::report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernels[k]->getName() + ": no corresponding output buffer. ");
    3543            }
    36         }
    37     }
    38 }
    39 
    40 static Value * getSegmentBlocks(BufferMap & bufferMap, KernelBuilder * kernel) {
    41     IDISA::IDISA_Builder * iBuilder = kernel->getBuilder();
    42     std::cerr << "getSegmentBlocks\n";
    43 
    44     KernelBuilder * sourceKernel;
    45 
    46     unsigned outputIndex;
    47     auto inputs = kernel->getStreamSetInputBuffers();
    48     if (inputs.empty()) return iBuilder->getSize(codegen::SegmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
    49     std::string inputSetName = kernel->getStreamInputs()[0].name;
    50     std::cerr << "inputSetName = " << inputSetName << "\n";
    51     auto f = bufferMap.find(inputs[0]);
    52     assert(f != bufferMap.end()  && "bufferMap failure");
    53     std::tie(sourceKernel, outputIndex) = f->second;
    54     std::cerr << "outputIndex = " << outputIndex << "\n";
    55     Value * produced = sourceKernel->getProducedItemCount(sourceKernel->getInstance(), sourceKernel->getStreamOutputs()[outputIndex].name);
    56     iBuilder->CallPrintInt("produced", produced);
    57     Value * processed = kernel->getProcessedItemCount(kernel->getInstance(), inputSetName);
    58     iBuilder->CallPrintInt("processed", processed);
    59     Value * itemsToDo = iBuilder->CreateSub(produced, processed);
    60     return iBuilder->CreateUDiv(itemsToDo, iBuilder->getSize(iBuilder->getStride()));
    61 }
    62 
    63 #endif
    64 
    65 Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, int id) {
    66 
     44            producerTable[k].push_back(f->second);
     45            unsigned sourceKernel, outputIndex;
     46            std::tie(sourceKernel, outputIndex) = f->second;
     47            if (sourceKernel >= k) {
     48                llvm::report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernels[k]->getName() + ": not defined before use. ");
     49            }
     50            //errs() << "sourceKernel: " + std::to_string(sourceKernel) + ", outputIndex: " + std::to_string(outputIndex) + ", user: " + std::to_string(k) + "\n";
     51            userTable[sourceKernel][outputIndex]= true;
     52           
     53        }
     54    }
     55    for (unsigned k = 0; k < kernels.size(); k++) {
     56        auto outputSets = kernels[k]->getStreamSetOutputBuffers();
     57        //errs() << "kernel: " + kernels[k]->getName() + "\n";
     58        for (unsigned j = 0; j < outputSets.size(); j++) {
     59            if (userTable[k][j] == false) {
     60                llvm::report_fatal_error("Pipeline error: output buffer #" + std::to_string(j) + " of " + kernels[k]->getName() + ": no users. ");
     61            }
     62        }
     63    }
     64    return producerTable;
     65}
     66
     67
     68Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, ProducerTable & producerTable, int id) {
     69   
     70    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     71    // set of the k^th kernel.  These values will be loaded immediately after the
     72    // doSegment and finalSegment calls for kernel k and later used as the
     73    // producer position arguments for later doSegment/finalSegment calls.
     74   
     75    std::vector<std::vector<Value *>> ProducerPos;
     76   
     77   
     78    const auto ip = iBuilder->saveIP();
     79   
    6780    Module * m = iBuilder->getModule();
    6881    Type * const size_ty = iBuilder->getSizeTy();
     
    87100    std::vector<BasicBlock *> segmentWait;
    88101    std::vector<BasicBlock *> segmentLoopBody;
    89     std::vector<BasicBlock *> partialSegmentWait;
    90     std::vector<BasicBlock *> partialSegmentLoopBody;
    91     bool terminationSignalEncountered = false;
    92102    for (unsigned i = 0; i < kernels.size(); i++) {
    93103        std::string kname = kernels[i]->getName();
    94104        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "Wait", threadFunc, 0));
    95105        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "do_" + kname, threadFunc, 0));
    96         if (terminationSignalEncountered) {
    97             partialSegmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), kname + "WaitFinal", threadFunc, 0));
    98             partialSegmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "finish_" + kname, threadFunc, 0));
    99         }
    100         else {
    101             partialSegmentWait.push_back(nullptr);
    102             partialSegmentLoopBody.push_back(nullptr);
    103             terminationSignalEncountered = kernels[i]->hasNoTerminateAttribute() == false;
    104         }
    105     }
    106     segmentWait.push_back(segmentLoop); // If the last kernel does not terminate, loop back.
    107     partialSegmentWait.push_back(exitThreadBlock); // After the last kernel terminates, we're done.
     106    }
    108107
    109108    iBuilder->SetInsertPoint(entryBlock);
     
    116115    }
    117116   
    118     // Some important constant values.
    119     int segmentSize = codegen::SegmentSize;
    120     Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
    121117    iBuilder->CreateBr(segmentLoop);
    122118
     
    128124    Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
    129125    iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]);
    130 
    131    
    132    
    133     for (unsigned i = 0; i < kernels.size(); i++) {
    134         iBuilder->SetInsertPoint(segmentWait[i]);
    135         Value * processedSegmentCount = kernels[i]->acquireLogicalSegmentNo(instancePtrs[i]);
     126   
     127    Value * doFinal = ConstantInt::getNullValue(iBuilder->getInt1Ty());
     128
     129    for (unsigned k = 0; k < kernels.size(); k++) {
     130        iBuilder->SetInsertPoint(segmentWait[k]);
     131        Value * processedSegmentCount = kernels[k]->acquireLogicalSegmentNo(instancePtrs[k]);
    136132        Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    137         iBuilder->CreateCondBr(cond, segmentLoopBody[i], segmentWait[i]);
    138 
    139         iBuilder->SetInsertPoint(segmentLoopBody[i]);
    140         if (i == last_kernel) {
     133        iBuilder->CreateCondBr(cond, segmentLoopBody[k], segmentWait[k]);
     134       
     135        iBuilder->SetInsertPoint(segmentLoopBody[k]);
     136        if (k == last_kernel) {
    141137            segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]);
    142138        }
    143         kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks);
    144         if (kernels[i]->hasNoTerminateAttribute()) {
    145             kernels[i]->releaseLogicalSegmentNo(instancePtrs[i], nextSegNo);
    146             iBuilder->CreateBr(segmentWait[i+1]);
     139       
     140       
     141       
     142       
     143        std::vector<Value *> doSegmentArgs = {instancePtrs[k], doFinal};
     144        for (unsigned j = 0; j < kernels[k]->getStreamInputs().size(); j++) {
     145            unsigned producerKernel, outputIndex;
     146            std::tie(producerKernel, outputIndex) = producerTable[k][j];
     147            doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
     148        }
     149        kernels[k]->createDoSegmentCall(doSegmentArgs);
     150        std::vector<Value *> produced;
     151        for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
     152            produced.push_back(kernels[k]->getProducedItemCount(instancePtrs[k], kernels[k]->getStreamOutputs()[i].name));
     153        }
     154        ProducerPos.push_back(produced);
     155        if (! (kernels[k]->hasNoTerminateAttribute())) {
     156            Value * terminated = kernels[k]->getTerminationSignal(instancePtrs[k]);
     157            doFinal = iBuilder->CreateOr(doFinal, terminated);
     158        }
     159        kernels[k]->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
     160        if (k == last_kernel) {
     161            iBuilder->CreateCondBr(doFinal, exitThreadBlock, segmentLoop);
    147162        }
    148163        else {
    149             Value * terminated = kernels[i]->getTerminationSignal(instancePtrs[i]);
    150             kernels[i]->releaseLogicalSegmentNo(instancePtrs[i], nextSegNo);
    151             iBuilder->CreateCondBr(terminated, partialSegmentWait[i+1], segmentWait[i+1]);
    152         }
    153         if (partialSegmentWait[i] != nullptr) {
    154             iBuilder->SetInsertPoint(partialSegmentWait[i]);
    155             Value * processedSegmentCount = kernels[i]->acquireLogicalSegmentNo(instancePtrs[i]);
    156             Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    157             iBuilder->CreateCondBr(cond, partialSegmentLoopBody[i], partialSegmentWait[i]);
    158            
    159             iBuilder->SetInsertPoint(partialSegmentLoopBody[i]);
    160             kernels[i]->createFinalSegmentCall(instancePtrs[i], segmentBlocks);
    161             kernels[i]->releaseLogicalSegmentNo(instancePtrs[i], nextSegNo);
    162             iBuilder->CreateBr(partialSegmentWait[i+1]);
     164            iBuilder->CreateBr(segmentWait[k+1]);
    163165        }
    164166    }
     
    168170    iBuilder->CreatePThreadExitCall(nullVal);
    169171    iBuilder->CreateRetVoid();
     172    iBuilder->restoreIP(ip);
    170173
    171174    return threadFunc;
     
    182185   
    183186    unsigned threadNum = codegen::ThreadNum;
    184 
     187   
    185188    Module * m = iBuilder->getModule();
    186 
     189   
    187190    Type * const size_ty = iBuilder->getSizeTy();
    188191    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
    189192    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    190 
     193   
    191194    for (auto k : kernels) k->createInstance();
    192 
     195   
     196    ProducerTable producerTable = createProducerTable(kernels);
     197   
    193198    Type * const pthreadsTy = ArrayType::get(size_ty, threadNum);
    194199    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     
    199204    Value * nullVal = Constant::getNullValue(voidPtrTy);
    200205    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
    201 
     206   
    202207    std::vector<Type *> structTypes;
    203208    for (unsigned i = 0; i < kernels.size(); i++) {
     
    205210    }
    206211    Type * sharedStructType = StructType::get(m->getContext(), structTypes);
    207 
     212   
    208213    AllocaInst * sharedStruct = iBuilder->CreateAlloca(sharedStructType);
    209214    for (unsigned i = 0; i < kernels.size(); i++) {
     
    211216        iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
    212217    }
    213 
     218   
    214219    std::vector<Function *> thread_functions;
    215220    const auto ip = iBuilder->saveIP();
    216221    for (unsigned i = 0; i < threadNum; i++) {
    217         thread_functions.push_back(generateSegmentParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, i));
     222        thread_functions.push_back(generateSegmentParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, producerTable, i));
    218223    }
    219224    iBuilder->restoreIP(ip);
    220 
     225   
    221226    for (unsigned i = 0; i < threadNum; i++) {
    222227        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], iBuilder->CreateBitCast(sharedStruct, int8PtrTy));
    223228    }
    224 
     229   
    225230    std::vector<Value *> threadIDs;
    226231    for (unsigned i = 0; i < threadNum; i++) {
     
    231236        iBuilder->CreatePThreadJoinCall(threadIDs[i], status);
    232237    }
    233 
     238   
    234239}
    235240
    236241void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    237  
     242    
    238243    Type * pthreadTy = iBuilder->getSizeTy();
    239244    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
    240245    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    241 
     246   
    242247    Type * const pthreadsTy = ArrayType::get(pthreadTy, kernels.size());
    243 
     248   
    244249    for (auto k : kernels) k->createInstance();
    245 
     250   
    246251    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
    247252    std::vector<Value *> pthreadsPtrs;
     
    251256    Value * nullVal = Constant::getNullValue(voidPtrTy);
    252257    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
    253 
     258   
    254259    std::vector<Function *> kernel_functions;
    255260    const auto ip = iBuilder->saveIP();
     
    258263    }
    259264    iBuilder->restoreIP(ip);
    260 
     265   
    261266    for (unsigned i = 0; i < kernels.size(); i++) {
    262267        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, kernel_functions[i], iBuilder->CreateBitCast(kernels[i]->getInstance(), int8PtrTy));
    263268    }
    264 
     269   
    265270    std::vector<Value *> threadIDs;
    266271    for (unsigned i = 0; i < kernels.size(); i++) {
     
    276281void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    277282    for (auto k : kernels) k->createInstance();
    278     //BufferMap bufferMap;
    279     //createStreamBufferMap(bufferMap, kernels);
    280283   
    281284    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    282285    Function * main = entryBlock->getParent();
    283 
    284     // Create the basic blocks
     286   
     287    // Create the basic blocks for the loop.
    285288    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
    286289    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
    287     // We create vectors of loop body and final segment blocks indexed by kernel.
    288     std::vector<BasicBlock *> loopBodyBlocks;
    289     std::vector<BasicBlock *> finalSegmentBlocks;
    290 
    291     loopBodyBlocks.push_back(segmentLoop);
    292     finalSegmentBlocks.push_back(nullptr); 
    293    
    294     for (unsigned i = 1; i < kernels.size(); i++) {
    295         if (kernels[i-1]->hasNoTerminateAttribute()) {
    296             // Previous kernel cannot terminate.   Continue with the previous blocks;
    297             loopBodyBlocks.push_back(loopBodyBlocks.back());
    298             finalSegmentBlocks.push_back(finalSegmentBlocks.back());
    299         }
    300         else {
    301             loopBodyBlocks.push_back(BasicBlock::Create(iBuilder->getContext(), "do_" + kernels[i]->getName(), main, 0));
    302             finalSegmentBlocks.push_back(BasicBlock::Create(iBuilder->getContext(), "finish_" + kernels[i]->getName(), main, 0));
    303         }
    304     }
    305     loopBodyBlocks.push_back(segmentLoop); // If the last kernel does not terminate, loop back.
    306     finalSegmentBlocks.push_back(exitBlock); // If the last kernel does terminate, we're done.
     290   
     291    ProducerTable producerTable = createProducerTable(kernels);
     292   
     293    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     294    // set of the k^th kernel.  These values will be loaded immediately after the
     295    // doSegment and finalSegment calls for kernel k and later used as the
     296    // producer position arguments for later doSegment/finalSegment calls.
     297   
     298    std::vector<std::vector<Value *>> ProducerPos;
    307299   
    308300    iBuilder->CreateBr(segmentLoop);
    309     Constant * segBlocks = iBuilder->getSize(codegen::SegmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
    310     for (unsigned i = 0; i < kernels.size(); i++) {
    311         iBuilder->SetInsertPoint(loopBodyBlocks[i]);
    312         //Value * segBlocks = getSegmentBlocks(bufferMap, kernels[i]);
    313         Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
    314         kernels[i]->createDoSegmentCall(kernels[i]->getInstance(), segBlocks);
    315         if (kernels[i]->hasNoTerminateAttribute()) {
    316             kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    317             if (i == kernels.size() - 1) {
    318                 iBuilder->CreateBr(segmentLoop);
    319             }
    320         }
    321         else {
    322             Value * terminated = kernels[i]->getTerminationSignal(kernels[i]->getInstance());
    323             kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    324             iBuilder->CreateCondBr(terminated, finalSegmentBlocks[i+1], loopBodyBlocks[i+1]);
    325         }
    326         if (finalSegmentBlocks[i] != nullptr) {
    327             iBuilder->SetInsertPoint(finalSegmentBlocks[i]);
    328             Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
    329             kernels[i]->createFinalSegmentCall(kernels[i]->getInstance(), segBlocks);
    330             kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    331             if (finalSegmentBlocks[i] != finalSegmentBlocks[i+1]) {
    332                 iBuilder->CreateBr(finalSegmentBlocks[i+1]);
    333             }
    334         }
    335     }
     301    iBuilder->SetInsertPoint(segmentLoop);
     302
     303    Value * terminationFound = ConstantInt::getNullValue(iBuilder->getInt1Ty());
     304    for (unsigned k = 0; k < kernels.size(); k++) {
     305        Value * instance = kernels[k]->getInstance();
     306        std::vector<Value *> doSegmentArgs = {instance, terminationFound};
     307        for (unsigned j = 0; j < kernels[k]->getStreamInputs().size(); j++) {
     308            unsigned producerKernel, outputIndex;
     309            std::tie(producerKernel, outputIndex) = producerTable[k][j];
     310            doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
     311        }
     312        kernels[k]->createDoSegmentCall(doSegmentArgs);
     313        if (! (kernels[k]->hasNoTerminateAttribute())) {
     314            Value * terminated = kernels[k]->getTerminationSignal(instance);
     315            terminationFound = iBuilder->CreateOr(terminationFound, terminated);
     316        }
     317        std::vector<Value *> produced;
     318        for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
     319            produced.push_back(kernels[k]->getProducedItemCount(instance, kernels[k]->getStreamOutputs()[i].name));
     320        }
     321        ProducerPos.push_back(produced);
     322        Value * segNo = kernels[k]->acquireLogicalSegmentNo(instance);
     323        kernels[k]->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     324    }
     325    iBuilder->CreateCondBr(terminationFound, exitBlock, segmentLoop);
    336326    iBuilder->SetInsertPoint(exitBlock);
    337327}
     328
     329   
Note: See TracChangeset for help on using the changeset viewer.