Ignore:
Timestamp:
Jan 25, 2017, 11:01:55 AM (3 years ago)
Author:
cameron
Message:

Elimination of StreamSetStructs? wrapping StreamSetBuffers?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5267 r5276  
    7272             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
    7373        }
    74         mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
     74        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamBufferPointerType(), mStreamSetInputs[i].name + bufferPtrSuffix});
    7575        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
    7676        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
     
    7878    }
    7979    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    80         mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
     80        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamBufferPointerType(), mStreamSetOutputs[i].name + bufferPtrSuffix});
    8181        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
    8282        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
     
    198198    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_doFinalBlock", doSegmentFunction, 0);
    199199    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
    200     BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_finalExit", doSegmentFunction, 0);
    201200    Type * const size_ty = iBuilder->getSizeTy();
    202201    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     
    272271   
    273272    iBuilder->SetInsertPoint(segmentDone);
    274 //#ifndef NDEBUG
    275 //    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
    276 //#endif
    277     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    278         Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    279         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    280         Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
    281         iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
    282     }
    283     iBuilder->CreateBr(finalExit);
    284     iBuilder->SetInsertPoint(finalExit);
    285273
    286274    iBuilder->CreateRetVoid();
     
    378366}
    379367
    380 Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
    381     return getScalarField(self, name + structPtrSuffix);
     368Value * KernelBuilder::getStreamSetBufferPtr(Value * self, const std::string & name) const {
     369    return getScalarField(self, name + bufferPtrSuffix);
    382370}
    383371
     
    392380
    393381Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
    394     return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetStructPtr(self, name), blockNo);
     382    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetBufferPtr(self, name), blockNo);
    395383}
    396384
    397385Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    398     return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index);
     386    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index);
    399387}
    400388
    401389Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
    402390    assert (index1->getType() == index2->getType());
    403     return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index1, index2);
     391    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index1, index2);
    404392}
    405393
    406394Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    407     return getStreamSetBuffer(name)->getStreamView(getStreamSetStructPtr(self, name), blockNo, index);
     395    return getStreamSetBuffer(name)->getStreamView(getStreamSetBufferPtr(self, name), blockNo, index);
    408396}
    409397
    410398Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
    411     return getStreamSetBuffer(name)->getStreamView(type, getStreamSetStructPtr(self, name), blockNo, index);
     399    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(self, name), blockNo, index);
    412400}
    413401
     
    423411    }
    424412    for (auto b : mStreamSetInputBuffers) {
    425         init_args.push_back(b->getStreamSetStructPtr());
     413        init_args.push_back(b->getStreamSetBasePtr());
    426414    }
    427415    for (auto b : mStreamSetOutputBuffers) {
    428         init_args.push_back(b->getStreamSetStructPtr());
     416        init_args.push_back(b->getStreamSetBasePtr());
    429417    }
    430418    std::string initFnName = mKernelName + init_suffix;
     
    436424}
    437425
    438 Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
    439     if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    440         llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
    441     }
    442     Module * m = iBuilder->getModule();
    443     Type * const voidTy = iBuilder->getVoidTy();
    444     PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    445     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    446     IntegerType * const int1ty = iBuilder->getInt1Ty();
    447    
    448     Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
    449     threadFunc->setCallingConv(CallingConv::C);
    450     Function::arg_iterator args = threadFunc->arg_begin();
    451    
    452     Value * const arg = &*(args++);
    453     arg->setName("args");
    454    
    455     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
    456    
    457     Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
    458    
    459     std::vector<Value *> inbufProducerPtrs;
    460     std::vector<Value *> inbufConsumerPtrs;
    461     std::vector<Value *> outbufProducerPtrs;
    462     std::vector<Value *> outbufConsumerPtrs;   
    463     std::vector<Value *> endSignalPtrs;
    464    
    465     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    466         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
    467         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
    468         inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    469         endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
    470     }
    471     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    472         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    473         outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
    474         outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    475     }
    476    
    477     const unsigned segmentBlocks = codegen::SegmentSize;
    478     const unsigned bufferSegments = codegen::BufferSegments;
    479     const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
    480     Type * const size_ty = iBuilder->getSizeTy();
    481    
    482     Value * segSize = ConstantInt::get(size_ty, segmentSize);
    483     Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
    484    
    485     BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
    486     BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
    487    
    488     BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
    489     BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
    490     BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
    491    
    492     iBuilder->CreateBr(outputCheckBlock);
    493    
    494     iBuilder->SetInsertPoint(outputCheckBlock);
    495    
    496     Value * waitCondTest = ConstantInt::get(int1ty, 1);   
    497     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    498         LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
    499         // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
    500         LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
    501         // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
    502         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
    503     }
    504    
    505     iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
    506    
    507     iBuilder->SetInsertPoint(inputCheckBlock);
    508    
    509     Value * requiredSize = segSize;
    510     if (mLookAheadPositions > 0) {
    511         requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
    512     }
    513     waitCondTest = ConstantInt::get(int1ty, 1);
    514     for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
    515         LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
    516         // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
    517         LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
    518         // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
    519         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
    520     }
    521    
    522     iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
    523    
    524     iBuilder->SetInsertPoint(endSignalCheckBlock);
    525    
    526     LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
    527     for (unsigned i = 1; i < endSignalPtrs.size(); i++){
    528         LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
    529         iBuilder->CreateAnd(endSignal, endSignal_next);
    530     }
    531    
    532     iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
    533    
    534     iBuilder->SetInsertPoint(doSegmentBlock);
    535    
    536     // needs positions
    537     createDoSegmentCall({self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
    538    
    539     for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    540         Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
    541         iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
    542     }
    543    
    544     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    545         Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    546         iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
    547     }
    548    
    549     Value * earlyEndSignal = getTerminationSignal(self);
    550     if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
    551         BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
    552         iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
    553        
    554         iBuilder->SetInsertPoint(earlyEndBlock);
    555         for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    556             Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    557             mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    558         }       
    559     }
    560     iBuilder->CreateBr(outputCheckBlock);
    561    
    562     iBuilder->SetInsertPoint(endBlock);
    563     LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
    564     LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
    565     Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
    566    
    567         // needs positions
    568     createDoSegmentCall({self, ConstantInt::getAllOnesValue(iBuilder->getInt1Ty())});
    569    
    570    
    571     for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    572         Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
    573         iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
    574     }
    575     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    576         iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
    577     }
    578    
    579     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    580         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    581         mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    582     }
    583    
    584     iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
    585     iBuilder->CreateRetVoid();
    586    
    587     return threadFunc;
    588    
    589 }
    590 
    591426KernelBuilder::~KernelBuilder() {
    592427}
Note: See TracChangeset for help on using the changeset viewer.