Changeset 5276


Ignore:
Timestamp:
Jan 25, 2017, 11:01:55 AM (10 months ago)
Author:
cameron
Message:

Elimination of StreamSetStructs? wrapping StreamSetBuffers?

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5267 r5276  
    7272             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
    7373        }
    74         mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
     74        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamBufferPointerType(), mStreamSetInputs[i].name + bufferPtrSuffix});
    7575        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
    7676        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
     
    7878    }
    7979    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    80         mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
     80        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamBufferPointerType(), mStreamSetOutputs[i].name + bufferPtrSuffix});
    8181        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
    8282        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
     
    198198    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_doFinalBlock", doSegmentFunction, 0);
    199199    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
    200     BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_finalExit", doSegmentFunction, 0);
    201200    Type * const size_ty = iBuilder->getSizeTy();
    202201    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
     
    272271   
    273272    iBuilder->SetInsertPoint(segmentDone);
    274 //#ifndef NDEBUG
    275 //    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
    276 //#endif
    277     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    278         Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    279         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    280         Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
    281         iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
    282     }
    283     iBuilder->CreateBr(finalExit);
    284     iBuilder->SetInsertPoint(finalExit);
    285273
    286274    iBuilder->CreateRetVoid();
     
    378366}
    379367
    380 Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
    381     return getScalarField(self, name + structPtrSuffix);
     368Value * KernelBuilder::getStreamSetBufferPtr(Value * self, const std::string & name) const {
     369    return getScalarField(self, name + bufferPtrSuffix);
    382370}
    383371
     
    392380
    393381Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
    394     return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetStructPtr(self, name), blockNo);
     382    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetBufferPtr(self, name), blockNo);
    395383}
    396384
    397385Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    398     return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index);
     386    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index);
    399387}
    400388
    401389Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
    402390    assert (index1->getType() == index2->getType());
    403     return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index1, index2);
     391    return getStreamSetBuffer(name)->getStream(getStreamSetBufferPtr(self, name), blockNo, index1, index2);
    404392}
    405393
    406394Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
    407     return getStreamSetBuffer(name)->getStreamView(getStreamSetStructPtr(self, name), blockNo, index);
     395    return getStreamSetBuffer(name)->getStreamView(getStreamSetBufferPtr(self, name), blockNo, index);
    408396}
    409397
    410398Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
    411     return getStreamSetBuffer(name)->getStreamView(type, getStreamSetStructPtr(self, name), blockNo, index);
     399    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(self, name), blockNo, index);
    412400}
    413401
     
    423411    }
    424412    for (auto b : mStreamSetInputBuffers) {
    425         init_args.push_back(b->getStreamSetStructPtr());
     413        init_args.push_back(b->getStreamSetBasePtr());
    426414    }
    427415    for (auto b : mStreamSetOutputBuffers) {
    428         init_args.push_back(b->getStreamSetStructPtr());
     416        init_args.push_back(b->getStreamSetBasePtr());
    429417    }
    430418    std::string initFnName = mKernelName + init_suffix;
     
    436424}
    437425
    438 Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
    439     if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    440         llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
    441     }
    442     Module * m = iBuilder->getModule();
    443     Type * const voidTy = iBuilder->getVoidTy();
    444     PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    445     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    446     IntegerType * const int1ty = iBuilder->getInt1Ty();
    447    
    448     Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
    449     threadFunc->setCallingConv(CallingConv::C);
    450     Function::arg_iterator args = threadFunc->arg_begin();
    451    
    452     Value * const arg = &*(args++);
    453     arg->setName("args");
    454    
    455     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
    456    
    457     Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
    458    
    459     std::vector<Value *> inbufProducerPtrs;
    460     std::vector<Value *> inbufConsumerPtrs;
    461     std::vector<Value *> outbufProducerPtrs;
    462     std::vector<Value *> outbufConsumerPtrs;   
    463     std::vector<Value *> endSignalPtrs;
    464    
    465     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    466         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
    467         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
    468         inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    469         endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
    470     }
    471     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    472         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    473         outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
    474         outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    475     }
    476    
    477     const unsigned segmentBlocks = codegen::SegmentSize;
    478     const unsigned bufferSegments = codegen::BufferSegments;
    479     const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
    480     Type * const size_ty = iBuilder->getSizeTy();
    481    
    482     Value * segSize = ConstantInt::get(size_ty, segmentSize);
    483     Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
    484    
    485     BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
    486     BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
    487    
    488     BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
    489     BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
    490     BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
    491    
    492     iBuilder->CreateBr(outputCheckBlock);
    493    
    494     iBuilder->SetInsertPoint(outputCheckBlock);
    495    
    496     Value * waitCondTest = ConstantInt::get(int1ty, 1);   
    497     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    498         LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
    499         // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
    500         LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
    501         // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
    502         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
    503     }
    504    
    505     iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock);
    506    
    507     iBuilder->SetInsertPoint(inputCheckBlock);
    508    
    509     Value * requiredSize = segSize;
    510     if (mLookAheadPositions > 0) {
    511         requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
    512     }
    513     waitCondTest = ConstantInt::get(int1ty, 1);
    514     for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
    515         LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
    516         // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
    517         LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
    518         // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
    519         waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
    520     }
    521    
    522     iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
    523    
    524     iBuilder->SetInsertPoint(endSignalCheckBlock);
    525    
    526     LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
    527     for (unsigned i = 1; i < endSignalPtrs.size(); i++){
    528         LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
    529         iBuilder->CreateAnd(endSignal, endSignal_next);
    530     }
    531    
    532     iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
    533    
    534     iBuilder->SetInsertPoint(doSegmentBlock);
    535    
    536     // needs positions
    537     createDoSegmentCall({self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
    538    
    539     for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    540         Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
    541         iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
    542     }
    543    
    544     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    545         Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
    546         iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
    547     }
    548    
    549     Value * earlyEndSignal = getTerminationSignal(self);
    550     if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
    551         BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
    552         iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
    553        
    554         iBuilder->SetInsertPoint(earlyEndBlock);
    555         for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    556             Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    557             mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    558         }       
    559     }
    560     iBuilder->CreateBr(outputCheckBlock);
    561    
    562     iBuilder->SetInsertPoint(endBlock);
    563     LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
    564     LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
    565     Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
    566    
    567         // needs positions
    568     createDoSegmentCall({self, ConstantInt::getAllOnesValue(iBuilder->getInt1Ty())});
    569    
    570    
    571     for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
    572         Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
    573         iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
    574     }
    575     for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
    576         iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
    577     }
    578    
    579     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    580         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    581         mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    582     }
    583    
    584     iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
    585     iBuilder->CreateRetVoid();
    586    
    587     return threadFunc;
    588    
    589 }
    590 
    591426KernelBuilder::~KernelBuilder() {
    592427}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5267 r5276  
    2525const std::string producedItemCountSuffix = "_producedItemCount";
    2626const std::string terminationSignal = "terminationSignal";
    27 const std::string structPtrSuffix = "_structPtr";
     27const std::string bufferPtrSuffix = "_bufferPtr";
    2828const std::string blkMaskSuffix = "_blkMask";
    2929
     
    4343   
    4444    void createInstance() override;
    45 
    46     llvm::Function * generateThreadFunction(const std::string & name) const;
    4745
    4846    llvm::Value * getBlockNo(llvm::Value * self) const;
     
    167165    llvm::Value * getScalarFieldPtr(llvm::Value * self, const std::string & name) const;
    168166
    169     llvm::Value * getStreamSetStructPtr(llvm::Value * self, const std::string & name) const;
     167    llvm::Value * getStreamSetBufferPtr(llvm::Value * self, const std::string & name) const;
    170168
    171169    llvm::Value * getStreamSetPtr(llvm::Value * self, const std::string & name, llvm::Value * blockNo) const;
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5267 r5276  
    4646    setTerminationSignal(self);
    4747    iBuilder->CreateBr(mmapSourceExit);
     48   
    4849    iBuilder->SetInsertPoint(mmapSourceExit);
    49     Value * ssStructPtr = getStreamSetStructPtr(self, "sourceBuffer");
    50     Value * producerPosPtr = mStreamSetOutputBuffers[0]->getProducerPosPtr(ssStructPtr);
    51     iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
    52    
    5350    iBuilder->CreateRetVoid();
    5451    iBuilder->restoreIP(savePoint);
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5267 r5276  
    232232    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
    233233    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
    234     // Final Block arguments: self, remaining, then the standard DoBlock args.
    235234    Function::arg_iterator args = finalBlockFunction->arg_begin();
    236235    Value * self = &*(args++);
    237     /* Skip "remaining" arg */ args++;
    238236    std::vector<Value *> doBlockArgs = {self};
    239     while (args != finalBlockFunction->arg_end()){
    240         doBlockArgs.push_back(&*args++);
    241     }
    242     Value * i16UnitsGenerated = getProducedItemCount(self, "i16Stream"); // units generated to buffer
    243237    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    244     i16UnitsGenerated = getProducedItemCount(self, "i16Stream"); // units generated to buffer
    245     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    246         Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
    247         Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
    248         iBuilder->CreateAtomicStoreRelease(i16UnitsGenerated, producerPosPtr);
    249     }
    250238    iBuilder->CreateRetVoid();
    251239    iBuilder->restoreIP(savePoint);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5275 r5276  
    77#include <toolchain.h>
    88#include <kernels/kernel.h>
     9#include <kernels/streamset.h>
    910#include <llvm/IR/Module.h>
    1011#include <unordered_map>
     
    6465}
    6566
     67std::vector<Value *> getCopyBackPositions(const KernelBuilder * kernel, Value * instance) {
     68    std::vector<Value *> positions;
     69    auto outputSets = kernel->getStreamSetOutputBuffers();
     70    for (unsigned i = 0; i < outputSets.size(); i++) {
     71        if (isa<LinearCopybackBuffer>(outputSets[i])) {
     72            positions.push_back(kernel->getProducedItemCount(instance, kernel->getStreamOutputs()[i].name));
     73        }
     74    }
     75    return positions;
     76}
     77
     78void createCopyBackCode(const StreamSetBuffer *, Value * startPosition, Value * finalPosition) {
     79    //BasicBlock * doCopyBack = BasicBlock::Create(iBuilder->getContext(), kernels[k]->getName() + "copyBack" +std::to_string(j), threadFunc, 0);
     80   
     81   
     82}
     83
    6684
    6785Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, ProducerTable & producerTable, int id) {
     
    148166            doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
    149167        }
     168        std::vector<Value *> copyBackStartPosition = getCopyBackPositions(kernels[k], instancePtrs[k]);
    150169        kernels[k]->createDoSegmentCall(doSegmentArgs);
    151170        std::vector<Value *> produced;
     171        unsigned copyBackIndex = 0;
    152172        for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
    153173            produced.push_back(kernels[k]->getProducedItemCount(instancePtrs[k], kernels[k]->getStreamOutputs()[i].name));
     174            if (isa<LinearCopybackBuffer>(kernels[k]->getStreamSetOutputBuffers()[i])) {
     175                createCopyBackCode(kernels[k]->getStreamSetOutputBuffers()[i], copyBackStartPosition[copyBackIndex], produced[i]);
     176                copyBackIndex++;
     177            }
    154178        }
    155179        ProducerPos.push_back(produced);
     
    245269
    246270void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    247    
    248     IntegerType * pthreadTy = iBuilder->getSizeTy();
    249     PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    250     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    251    
    252     ArrayType * const pthreadsTy = ArrayType::get(pthreadTy, kernels.size());
    253    
    254     for (auto k : kernels) k->createInstance();
    255    
    256     AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
    257     std::vector<Value *> pthreadsPtrs;
    258     for (unsigned i = 0; i < kernels.size(); i++) {
    259         pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
    260     }
    261     Value * nullVal = Constant::getNullValue(voidPtrTy);
    262     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
    263    
    264     std::vector<Function *> kernel_functions;
    265     const auto ip = iBuilder->saveIP();
    266     for (unsigned i = 0; i < kernels.size(); i++) {
    267         kernel_functions.push_back(kernels[i]->generateThreadFunction("k_"+std::to_string(i)));
    268     }
    269     iBuilder->restoreIP(ip);
    270    
    271     for (unsigned i = 0; i < kernels.size(); i++) {
    272         iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, kernel_functions[i], iBuilder->CreateBitCast(kernels[i]->getInstance(), int8PtrTy));
    273     }
    274    
    275     std::vector<Value *> threadIDs;
    276     for (unsigned i = 0; i < kernels.size(); i++) {
    277         threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
    278     }
    279    
    280     for (unsigned i = 0; i < kernels.size(); i++) {
    281         iBuilder->CreatePThreadJoinCall(threadIDs[i], status);
    282     }
     271    llvm::report_fatal_error("Pipeline parallelism no longer supported!");
    283272}
    284273
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5267 r5276  
    100100   
    101101    const unsigned packAlign = iBuilder->getBitBlockWidth()/8;
     102
    102103    Function::arg_iterator args = doSegmentFunction->arg_begin();
    103104    Value * self = &*(args++);
    104     Value * blocksToDo = &*(args);
    105     Value * streamStructPtr = getStreamSetStructPtr(self, "sourceStream");
    106 
    107     LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(mStreamSetInputBuffers[0]->getProducerPosPtr(streamStructPtr));
     105    Value * doFinal = &*(args++);
     106    Value * producerPos = &*(args++);
    108107    Value * processed = getProcessedItemCount(self, "sourceStream");
    109108    Value * itemsAvail = iBuilder->CreateSub(producerPos, processed);
     
    266265    Value * totalProduced = iBuilder->CreateAdd(iBuilder->CreateMul(iBuilder->CreateUDiv(processed, Const3), Const4), iBuilder->CreateURem(processed, Const3));
    267266    setProducedItemCount(self, "expandedStream", totalProduced);
    268     Value * ssStructPtr = getStreamSetStructPtr(self, "expandedStream");
    269 
    270     Value * producerPosPtr = mStreamSetOutputBuffers[0]->getProducerPosPtr(ssStructPtr);
    271 
    272     iBuilder->CreateAtomicStoreRelease(totalProduced, producerPosPtr);
    273267   
    274268    iBuilder->CreateCondBr(inFinalSegment, setTermination, expand3_4_exit);
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5267 r5276  
    3030    /* unused Value * doFinal = &*(args++);*/ args++;
    3131    Value * producerPos = &*(args++);
    32     Value * streamStructPtr = getStreamSetStructPtr(self, "codeUnitBuffer");
    3332    Value * processed = getProcessedItemCount(self, "codeUnitBuffer");
    3433    Value * itemsToDo = iBuilder->CreateSub(producerPos, processed);
     
    4241    setProcessedItemCount(self, "codeUnitBuffer", processed);
    4342    setScalarField(self, blockNoScalar, iBuilder->CreateUDiv(processed, blockItems));
    44     mStreamSetInputBuffers[0]->setConsumerPos(streamStructPtr, processed);
    4543
    4644    iBuilder->CreateRetVoid();
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5260 r5276  
    2323using namespace IDISA;
    2424
    25 enum SS_struct_index {iProducer_pos = 0, iConsumer_pos = 1, iEnd_of_input = 2, iBuffer_ptr = 3};
    26 
    27 Value * StreamSetBuffer::getProducerPosPtr(Value * self) const {
    28     return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)});
    29 }
    30 
    31 void StreamSetBuffer::setProducerPos(Value * self, Value * pos) const {
    32     iBuilder->CreateStore(pos, getProducerPosPtr(self));
    33 }
    34 
    35 Value * StreamSetBuffer::getConsumerPosPtr(Value * self) const {
    36     return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
    37 }
    38 
    39 void StreamSetBuffer::setConsumerPos(Value * self, Value * pos) const {
    40     iBuilder->CreateStore(pos, getConsumerPosPtr(self));
    41 }
    42 
    43 Value * StreamSetBuffer::getEndOfInputPtr(Value * self) const {
    44     return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)});
    45 }
    46 
    47 void StreamSetBuffer::setEndOfInput(Value * self) const {
    48     iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), getEndOfInputPtr(self));
    49 }
    50 
    5125Type * StreamSetBuffer::resolveStreamTypes(Type * type) {
    5226    if (auto ty = dyn_cast<ArrayType>(type)) {
     
    6438
    6539void StreamSetBuffer::allocateBuffer() {
    66     Type * const sizeTy = iBuilder->getSizeTy();
    67     Type * const int1ty = iBuilder->getInt1Ty();
    6840    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetType, iBuilder->getSize(mBufferBlocks));
    69     mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType);
    70     iBuilder->CreateStore(ConstantInt::get(sizeTy, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    71     iBuilder->CreateStore(ConstantInt::get(sizeTy, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    72     iBuilder->CreateStore(ConstantInt::get(int1ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    73     iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
    7441}
    7542
     
    9562// For a single block buffer, the block pointer is always the buffer base pointer.
    9663Value * SingleBlockBuffer::getStreamSetPtr(Value * self, Value *) const {
    97     return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}), "sb");
     64    return self;
    9865}
    9966
     
    10168void ExternalFileBuffer::setStreamSetBuffer(Value * ptr, Value * fileSize) {
    10269   
    103     Type * const size_ty = iBuilder->getSizeTy();
    104     Type * const int1ty = iBuilder->getInt1Ty();
    105    
    10670    PointerType * t = getStreamBufferPointerType();   
    10771    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
    108    
    109     mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType);
    110     iBuilder->CreateStore(fileSize, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    111     iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    112     iBuilder->CreateStore(ConstantInt::get(int1ty, 1), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    113     iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
    11472}
    11573
    11674void ExternalFileBuffer::setEmptyBuffer(Value * ptr) {
    11775   
    118     Type * const size_ty = iBuilder->getSizeTy();
    119     Type * const int1ty = iBuilder->getInt1Ty();
    120    
    12176    PointerType * t = getStreamBufferPointerType();   
    12277    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
    123    
    124     mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType);
    125     iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    126     iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    127     iBuilder->CreateStore(ConstantInt::get(int1ty,0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    128     iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
    12978}
    13079
     
    13483
    13584Value * ExternalFileBuffer::getStreamSetPtr(Value * self, Value * blockNo) const {
    136     Value * handle = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}, "ef");
    137     Value * bufPtr = iBuilder->CreateLoad(handle);
    138     return iBuilder->CreateGEP(bufPtr, blockNo);
     85    return iBuilder->CreateGEP(self, blockNo);
    13986}
    14087
     
    14491    assert (blockNo->getType()->isIntegerTy());
    14592
    146     Value * handle = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}, "cb");
    147     Value * bufPtr = iBuilder->CreateLoad(handle);
    14893    Value * offset = nullptr;
    14994    if (mBufferBlocks == 1) {
     
    15499        offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks));
    155100    }
    156     return iBuilder->CreateGEP(bufPtr, offset);
     101    return iBuilder->CreateGEP(self, offset);
    157102}
    158103
     
    160105
    161106Value * LinearCopybackBuffer::getStreamSetPtr(Value * self, Value * blockNo) const {
    162     Value * consumerPos_ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}, "lcb.c");
    163     Value * consumerPos = iBuilder->CreateLoad(consumerPos_ptr);
    164     Value * consumerBlock = iBuilder->CreateUDiv(consumerPos, iBuilder->getSize(iBuilder->getStride()));
    165     consumerBlock = iBuilder->CreateZExtOrTrunc(consumerBlock, blockNo->getType());
    166     Value * handle = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}, "lcb.p");
    167     Value * bufPtr = iBuilder->CreateLoad(handle);
    168     return iBuilder->CreateGEP(bufPtr, iBuilder->CreateSub(blockNo, consumerBlock));
     107    Value * offset = nullptr;
     108    if (mBufferBlocks == 1) {
     109        offset = ConstantInt::getNullValue(iBuilder->getSizeTy());
     110    } else if ((mBufferBlocks & (mBufferBlocks - 1)) == 0) { // is power of 2
     111        offset = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks - 1));
     112    } else {
     113        offset = iBuilder->CreateURem(blockNo, ConstantInt::get(blockNo->getType(), mBufferBlocks));
     114    }
     115    return iBuilder->CreateGEP(self, offset);
    169116}
    170117
    171 void LinearCopybackBuffer::setConsumerPos(Value * self, Value * newConsumerPos) const {
    172     Type * const i8 = iBuilder->getInt8Ty();
    173     Type * const i8_ptr = i8->getPointerTo(mAddrSpace);
    174     IntegerType * const sizeTy = iBuilder->getSizeTy();
    175 
    176     Module * const M = iBuilder->getModule();
    177 
    178     Function * const current = iBuilder->GetInsertBlock()->getParent();
    179     BasicBlock * const copyBackBody = BasicBlock::Create(M->getContext(), "copy_back", current, 0);
    180     BasicBlock * const setConsumerPosExit = BasicBlock::Create(M->getContext(), "setConsumerPos_done", current, 0);
    181     Constant * const blockWidth = ConstantInt::get(sizeTy, iBuilder->getStride());
    182 
    183     Constant * const one = ConstantInt::get(sizeTy, 1);
    184 
    185     Value * const consumerPosPtr = getConsumerPosPtr(self);
    186     Value * const consumerPos = iBuilder->CreateLoad(consumerPosPtr);
    187 
    188     // Ensure that the new consumer position is no less than the current position.
    189     newConsumerPos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(newConsumerPos, consumerPos), consumerPos, newConsumerPos);
    190     Value * producerPos = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    191 
    192     // Ensure that the new consumer position is no greater than the current producer position.
    193     Value * new_pos_lt_producer_pos = iBuilder->CreateICmpULT(newConsumerPos, producerPos);
    194     newConsumerPos = iBuilder->CreateSelect(new_pos_lt_producer_pos, newConsumerPos, producerPos);
    195 
    196     // Now, the new_consumer_pos is at most = to the producer_pos; if =, we're done.
    197     iBuilder->CreateCondBr(new_pos_lt_producer_pos, copyBackBody, setConsumerPosExit);
    198     iBuilder->SetInsertPoint(copyBackBody);
    199    
    200     Value * new_consumer_block = iBuilder->CreateUDiv(newConsumerPos, blockWidth);
    201     Value * lastProducerBlock = iBuilder->CreateUDiv(iBuilder->CreateSub(producerPos, one), blockWidth);
    202     Value * copyBlocks = iBuilder->CreateAdd(iBuilder->CreateSub(lastProducerBlock, new_consumer_block), one);
    203 
    204     DataLayout dl(iBuilder->getModule());
    205 
    206     Constant * blockBytes = ConstantInt::get(sizeTy, dl.getTypeAllocSize(mStreamSetType) * iBuilder->getStride());
    207 
    208     Value * copyLength = iBuilder->CreateMul(copyBlocks, blockBytes);
    209 
    210     // Must copy back one full block for each of the streams in the stream set.
    211     Value * handle = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
    212     Value * bufferPtr = iBuilder->CreateLoad(handle);
    213     Value * const consumerBlock = iBuilder->CreateUDiv(consumerPos, blockWidth);
    214     Value * copyFrom = iBuilder->CreateGEP(bufferPtr, iBuilder->CreateSub(new_consumer_block, consumerBlock));
    215     unsigned alignment = iBuilder->getBitBlockWidth() / 8;
    216     iBuilder->CreateMemMove(iBuilder->CreateBitCast(bufferPtr, i8_ptr), iBuilder->CreateBitCast(copyFrom, i8_ptr), copyLength, alignment);
    217     iBuilder->CreateBr(setConsumerPosExit);
    218     // Copy back done, store the new consumer position.
    219     iBuilder->SetInsertPoint(setConsumerPosExit);
    220 
    221     iBuilder->CreateStore(newConsumerPos, consumerPosPtr);
    222 }
    223118
    224119// Expandable Buffer
     
    278173, mBufferBlocks(blocks)
    279174, mAddrSpace(AddressSpace)
    280 , mStreamSetBufferPtr(nullptr)
    281 , mStreamSetStructPtr(nullptr)
    282 , mStreamSetStructType(StructType::get(b->getContext(),
    283                         {{b->getSizeTy(),
    284                           b->getSizeTy(),
    285                           b->getInt1Ty(),
    286                           PointerType::get(mStreamSetType, AddressSpace)}})) {
     175, mStreamSetBufferPtr(nullptr) {
    287176
    288177}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5260 r5276  
    1515namespace parabix {
    1616   
    17 // Stream Set Structs hold information about the current state of a stream set buffer.
    18 
    19 llvm::Value * getProducerPosPtr(IDISA::IDISA_Builder * b, llvm::Value * self);
    20 llvm::Value * getConsumerPosPtr(IDISA::IDISA_Builder * b, llvm::Value * self);
    21 llvm::Value * getEndOfInputPtr(IDISA::IDISA_Builder * b, llvm::Value * self);
    22 llvm::Value * getStreamSetBufferPtr(IDISA::IDISA_Builder * b, llvm::Value * self);
    23 
    2417class StreamSetBuffer {
    2518    friend class kernel::KernelBuilder;
     
    4134    }
    4235
    43     llvm::PointerType * getStreamSetStructPointerType() const {
    44         return mStreamSetStructType->getPointerTo();
    45     }
    46 
    4736    size_t getBufferSize() const { return mBufferBlocks; }
    4837
    4938    llvm::Value * getStreamSetBasePtr() const { return mStreamSetBufferPtr; }
    50 
    51     llvm::Value * getStreamSetStructPtr() const { return mStreamSetStructPtr; }
    5239
    5340    virtual void allocateBuffer();
     
    6148    virtual llvm::Value * getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const;
    6249
    63     llvm::Value * getProducerPosPtr(llvm::Value * self) const;
    64 
    65     void setProducerPos(llvm::Value * self, llvm::Value * pos) const;
    66 
    67     llvm::Value * getConsumerPosPtr(llvm::Value * self) const;
    68 
    69     virtual void setConsumerPos(llvm::Value * self, llvm::Value * pos) const;
    70 
    71     llvm::Value * getEndOfInputPtr(llvm::Value * self) const;
    72 
    73     void setEndOfInput(llvm::Value * self) const;
    74    
    7550    llvm::Type * resolveStreamTypes(llvm::Type * type);
    7651   
     
    8964    const int                       mAddrSpace;
    9065    llvm::Value *                   mStreamSetBufferPtr;
    91     llvm::Value *                   mStreamSetStructPtr;
    92     llvm::Type * const              mStreamSetStructType;
    9366};   
    9467
     
    147120    // Reset the buffer to contain data starting at the base block of new_consumer_pos,
    148121    // copying back any data beyond that position.
    149     void setConsumerPos(llvm::Value * self, llvm::Value * newConsumerPos) const override;
     122    //void setConsumerPos(llvm::Value * self, llvm::Value * newConsumerPos) const override;
    150123
    151124protected:
Note: See TracChangeset for help on using the changeset viewer.