Changeset 5185 for icGREP


Ignore:
Timestamp:
Oct 8, 2016, 7:33:08 PM (3 years ago)
Author:
cameron
Message:

Linear buffers; stdout_kernel; cache-aligned alloca, restructured u8u16, clean-ups

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5180 r5185  
    108108
    109109add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp ${PRECOMPILED_FILES})
    110 add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp)
     110add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
    111111#add_executable(symtbl symboltable.cpp toolchain.cpp kernels/symboltablepipeline.cpp kernels/stdout_kernel.cpp grep_engine.cpp kernels/pipeline.cpp kernels/scanmatchgen.cpp ${PRECOMPILED_FILES})
    112112add_executable(wc wc.cpp toolchain.cpp kernels/pipeline.cpp)
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.h

    r5140 r5185  
    2222public:
    2323
    24     IDISA_Builder(Module * m, Type * bitBlockType)
     24    IDISA_Builder(Module * m, Type * bitBlockType, unsigned CacheAlignment=64)
    2525    : IRBuilder<>(m->getContext())
    2626    , mMod(m)
     27    , mCacheLineAlignment(CacheAlignment)
    2728    , mBitBlockType(bitBlockType)
    2829    , mBitBlockWidth(bitBlockType->isIntegerTy() ? cast<IntegerType>(bitBlockType)->getIntegerBitWidth() : cast<VectorType>(bitBlockType)->getBitWidth())
     
    142143    inline llvm::Type * getSizeTy() {return Triple(llvm::sys::getProcessTriple()).isArch32Bit() ? getInt32Ty() : getInt64Ty();}
    143144   
     145    inline llvm::AllocaInst * CreateCacheAlignedAlloca(llvm::Type * Ty, llvm::Value * ArraySize = nullptr) {
     146        llvm::AllocaInst * instr = CreateAlloca(Ty, ArraySize);
     147        instr->setAlignment(mCacheLineAlignment);
     148        return instr;
     149    }
     150   
     151   
    144152protected:
    145153    Module * mMod;
     154    unsigned mCacheLineAlignment;
    146155    Type * mBitBlockType;
    147156    unsigned mBitBlockWidth;
     
    179188   
    180189
     190   
    181191}
    182192#endif // IDISA_BUILDER_H
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5174 r5185  
    5959    doBlockFn->setCallingConv(CallingConv::C);
    6060    doBlockFn->setDoesNotThrow();
    61     for (int i = 1; i <= doBlockParameters.size(); i++) {
    62         doBlockFn->setDoesNotCapture(i);
    63     }
     61    doBlockFn->setDoesNotCapture(1);
    6462   
    6563    FunctionType * finalBlockType = FunctionType::get(iBuilder->getVoidTy(), finalBlockParameters, false);
     
    6967    finalBlockFn->setDoesNotThrow();
    7068    finalBlockFn->setDoesNotCapture(1);
    71     // Parameter #2 is not a pointer; nocapture is irrelevant
    72     for (int i = 3; i <= finalBlockParameters.size(); i++) {
    73         finalBlockFn->setDoesNotCapture(i);
    74     }
    7569   
    7670    Function::arg_iterator doBlockArgs = doBlockFn->arg_begin();
     
    10296
    10397Value * KernelInterface::createInstance(std::vector<Value *> args) {
    104     Value * kernelInstance = iBuilder->CreateAlloca(mKernelStateType);
     98    Value * kernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
    10599    Module * m = iBuilder->getModule();
    106100    std::vector<Value *> init_args = {kernelInstance};
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5183 r5185  
    5353        }
    5454        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    55              errs() << "buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
    56              llvm::report_fatal_error("Kernel preparation: Buffer size too small.");
     55             errs() << " buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
     56             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].ssName);
    5757        }
    58         mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
     58        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + structPtrSuffix});
    5959        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
    6060        streamSetNo++;
     
    6262    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    6363        if (!(mStreamSetOutputBuffers[i]->getBufferStreamSetType() == mStreamSetOutputs[i].ssType)) {
    64              llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type");
     64             llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type " + mStreamSetOutputs[i].ssName);
    6565        }
    66         mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
     66        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + structPtrSuffix});
    6767        mStreamSetNameMap.emplace(mStreamSetOutputs[i].ssName, streamSetNo);
    6868        streamSetNo++;
     
    149149}
    150150
     151// Note: this may be overridden to incorporate doBlock logic directly into
     152// the doSegment function.
    151153void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
    152154    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
     
    176178   
    177179    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    178         Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
    179         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
     180        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName);
     181        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
    180182    }
    181183   
     
    198200    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
    199201    blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail);
     202    //iBuilder->CallPrintInt(mKernelName + "_blocksAvail", blocksAvail);
    200203    iBuilder->CreateBr(blockLoopCond);
    201204
     
    208211    iBuilder->SetInsertPoint(blockLoopBody);
    209212    Value * blockNo = getScalarField(self, blockNoScalar);   
     213
    210214    generateDoBlockLogic(self, blockNo);
    211215    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
     
    221225#endif
    222226    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    223         Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    224         Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr);
     227        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     228        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
    225229        iBuilder->CreateAlignedStore(produced, producerPosPtr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
    226230    }
     
    332336}
    333337
    334 Value * KernelBuilder::getStreamSetBasePtr(Value * self, std::string ssName) {
    335     return getScalarField(self, ssName + basePtrSuffix);
     338Value * KernelBuilder::getStreamSetStructPtr(Value * self, std::string ssName) {
     339    return getScalarField(self, ssName + structPtrSuffix);
    336340}
    337341
    338342Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string ssName, Value * blockNo) {
    339     Value * basePtr = getStreamSetBasePtr(self, ssName);
     343    Value * ssStructPtr = getStreamSetStructPtr(self, ssName);
    340344    unsigned ssIndex = getStreamSetIndex(ssName);
    341345    if (ssIndex < mStreamSetInputs.size()) {
    342         return mStreamSetInputBuffers[ssIndex]->getStreamSetBlockPointer(basePtr, blockNo);
     346        return mStreamSetInputBuffers[ssIndex]->getStreamSetBlockPointer(ssStructPtr, blockNo);
    343347    }
    344348    else {
    345         return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getStreamSetBlockPointer(basePtr, blockNo);
     349        return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getStreamSetBlockPointer(ssStructPtr, blockNo);
    346350    }
    347351}
    348352
    349353Value * KernelBuilder::createInstance(std::vector<Value *> args) {
    350     Value * kernelInstance = iBuilder->CreateAlloca(mKernelStateType);
     354    Value * kernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
    351355    Module * m = iBuilder->getModule();
    352356    std::vector<Value *> init_args = {kernelInstance};
     
    394398
    395399    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    396         Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
    397         inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
    398         inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(basePtr));
    399         endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
     400        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName);
     401        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
     402        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
     403        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(ssStructPtr));
    400404    }
    401405    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    402         Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    403         outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
    404         outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(basePtr));
     406        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     407        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
     408        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
    405409    }
    406410
     
    493497        iBuilder->SetInsertPoint(earlyEndBlock);
    494498        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    495             Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    496             mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     499            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     500            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    497501        }       
    498502    }
     
    528532
    529533    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    530         Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
    531         mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
     534        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     535        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
    532536    }
    533537
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5174 r5185  
    2020const std::string producedItemCount = "producedItemCount";
    2121const std::string terminationSignal = "terminationSignal";
    22 const std::string basePtrSuffix = "_basePtr";
     22const std::string structPtrSuffix = "_structPtr";
    2323const std::string blkMaskSuffix = "_blkMask";
    2424
     
    109109    unsigned getStreamSetIndex(std::string ssName);
    110110   
    111     llvm::Value * getStreamSetBasePtr(Value * self, std::string ssName);
     111    llvm::Value * getStreamSetStructPtr(Value * self, std::string ssName);
    112112    size_t getStreamSetBufferSize(Value * self, std::string ssName);
    113113
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5141 r5185  
    152152}
    153153   
    154 static Function * create_write(Module * const mod) {
    155     Function * write = mod->getFunction("write");
    156     if (write == nullptr) {
    157         FunctionType *write_type =
    158         TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
    159         write = cast<Function>(mod->getOrInsertFunction("write", write_type,
    160                                                         AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
    161     }
    162     return write;
    163 }
    164    
    165    
    166    
     154
    167155void p2s_16Kernel_withCompressedOutput::generateDoBlockMethod() {
    168156    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     
    170158    Type * i32 = iBuilder->getIntNTy(32);
    171159    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
    172     Type * i8PtrTy = iBuilder->getInt8PtrTy();
    173 
    174     Function * writefn = create_write(m);
    175 
    176     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    177    
    178     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     160
     161    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     162   
     163    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     164    Constant * stride = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
     165
    179166    Value * self = getParameter(doBlockFunction, "self");
    180167    Value * blockNo = getScalarField(self, blockNoScalar);
    181168    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
    182169    Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
    183     Value * i16UnitsGenerated = getScalarField(self, "unitsGenerated");  // units generated to buffer
    184     Value * i16UnitsWritten = getScalarField(self, "unitsWritten");  // units written to stdout
    185    
    186     Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBasePtr(self, "i16Stream"), PointerType::get(iBuilder->getInt16Ty(), 0));
    187     Value * u16_output_ptr = iBuilder->CreateGEP(i16StreamBase_ptr, iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten));
     170    Value * i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
     171    Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
     172   
     173    Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBlockPtr(self, "i16Stream", i16BlockNo), PointerType::get(iBuilder->getInt16Ty(), 0));
     174   
     175    Value * u16_output_ptr = iBuilder->CreateGEP(i16StreamBase_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
    188176
    189177   
     
    222210   
    223211    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    224     setScalarField(self, "unitsGenerated", i16UnitsGenerated);
    225    
    226     Value * unitsInBuffer = iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten);
    227     Value * lessThanABlockRemaining = iBuilder->CreateICmpUGT(unitsInBuffer, ConstantInt::get(iBuilder->getSizeTy(), (getStreamSetBufferSize(self, "i16Stream") - 1) * iBuilder->getBitBlockWidth()));
    228     BasicBlock * flushStmts = BasicBlock::Create(iBuilder->getContext(), "flush", doBlockFunction, 0);
    229     BasicBlock * exitStmts = BasicBlock::Create(iBuilder->getContext(), "exit", doBlockFunction, 0);
    230     iBuilder->CreateCondBr(lessThanABlockRemaining, flushStmts, exitStmts);
    231    
    232     iBuilder->SetInsertPoint(flushStmts);
    233     iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(i16StreamBase_ptr, i8PtrTy), iBuilder->CreateAdd(unitsInBuffer, unitsInBuffer)}));
    234     setScalarField(self, "unitsWritten", i16UnitsGenerated); // Everything generated has now been written.
    235     iBuilder->CreateBr(exitStmts);
    236    
    237     iBuilder->SetInsertPoint(exitStmts);
     212    setProducedItemCount(self, i16UnitsGenerated);
    238213    iBuilder->CreateRetVoid();
    239214    iBuilder->restoreIP(savePoint);
     
    243218    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    244219    Module * m = iBuilder->getModule();
    245     Type * i8PtrTy = iBuilder->getInt8PtrTy();
    246     Function * writefn = create_write(m);
    247220    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    248221    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     
    256229        doBlockArgs.push_back(&*args++);
    257230    }
     231    Value * i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
     232
    258233    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
    259     Value * i16UnitsGenerated = getScalarField(self, "unitsGenerated");  // units generated to buffer
    260     Value * i16UnitsWritten = getScalarField(self, "unitsWritten");  // units written to stdout
    261     Value * unitsInBuffer = iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten);
    262     Value * mustFlush = iBuilder->CreateICmpUGT(unitsInBuffer, ConstantInt::get(iBuilder->getSizeTy(), 0));
    263    
    264     BasicBlock * flushStmts = BasicBlock::Create(iBuilder->getContext(), "flush", finalBlockFunction, 0);
    265     BasicBlock * exitStmts = BasicBlock::Create(iBuilder->getContext(), "exit", finalBlockFunction, 0);
    266     iBuilder->CreateCondBr(mustFlush, flushStmts, exitStmts);
    267    
    268     iBuilder->SetInsertPoint(flushStmts);
    269     Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBasePtr(self, "i16Stream"), PointerType::get(iBuilder->getInt16Ty(), 0));
    270     iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(i16StreamBase_ptr, i8PtrTy), iBuilder->CreateAdd(unitsInBuffer, unitsInBuffer)}));
    271     setScalarField(self, "unitsWritten", i16UnitsGenerated); // Everything generated has now been written.
    272     iBuilder->CreateBr(exitStmts);
    273    
    274     iBuilder->SetInsertPoint(exitStmts);
    275     iBuilder->CreateRetVoid();
    276     iBuilder->restoreIP(savePoint);
    277 }
    278    
    279    
    280 }
     234    i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
     235    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     236        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
     237        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
     238        iBuilder->CreateAlignedStore(i16UnitsGenerated, producerPosPtr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
     239    }
     240    iBuilder->CreateRetVoid();
     241    iBuilder->restoreIP(savePoint);
     242}
     243   
     244   
     245}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5106 r5185  
    2121}
    2222
    23 //  Override the default void type for DoBlock functions.
    24 void stdOutKernel::prepareKernel() {
    25     setDoBlockReturnType(mStreamType);
    26     KernelBuilder::prepareKernel();
    27 }
    28 
    29 
     23// The doBlock method is deprecated.   But incase it is used, just call doSegment with
     24// 1 as the number of blocks to do.
    3025void stdOutKernel::generateDoBlockMethod() {
    3126    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
    3227    Module * m = iBuilder->getModule();
     28    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     29    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
     30    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     31    Value * self = getParameter(doBlockFunction, "self");
     32    iBuilder->CreateCall(doSegmentFunction, {self, ConstantInt::get(iBuilder->getSizeTy(), 1)});
     33    iBuilder->CreateRetVoid();
     34    iBuilder->restoreIP(savePoint);
     35}
     36           
     37void stdOutKernel::generateDoSegmentMethod() {
     38    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     39    Module * m = iBuilder->getModule();
    3340    Function * writefn = create_write(m);
    34     Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     41    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
    3542    Type * i8PtrTy = iBuilder->getInt8PtrTy();
     43   
     44    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
     45    Constant * stride = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
     46    Constant * strideBytes = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride() * mCodeUnitWidth/8);
     47   
     48    Function::arg_iterator args = doSegmentFunction->arg_begin();
     49    Value * self = &*(args++);
     50    Value * blocksToDo = &*(args);
     51    ////iBuilder->CallPrintInt("blocksToDo", blocksToDo);
     52    Value * segmentNo = getLogicalSegmentNo(self);
     53    Value * streamStructPtr = getStreamSetStructPtr(self, "codeUnitBuffer");
     54    //iBuilder->CallPrintInt("streamStructPtr", iBuilder->CreatePtrToInt(streamStructPtr, iBuilder->getInt64Ty()));
    3655
    37     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     56    LoadInst * producerPos = iBuilder->CreateAlignedLoad(mStreamSetInputBuffers[0]->getProducerPosPtr(streamStructPtr), sizeof(size_t));
     57    producerPos->setOrdering(AtomicOrdering::Acquire);
     58    //iBuilder->CallPrintInt("producerPos", producerPos);
    3859
    39     Value * self = getParameter(doBlockFunction, "self");
     60    Value * processed = getProcessedItemCount(self);
     61    Value * itemsAvail = iBuilder->CreateSub(producerPos, processed);
     62    //iBuilder->CallPrintInt("previously processed", processed);
     63    Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride);
     64    //iBuilder->CallPrintInt("blocksAvail", blocksAvail);
     65    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
     66    blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail);
    4067    Value * blockNo = getScalarField(self, blockNoScalar);
    41     Value * inputStreamBlock = getStreamSetBlockPtr(self, "inputStreamSet", blockNo);
    42 
    43     Value * bufferPtr = getScalarField(self, "bufferPtr");
    44     Value * bufferFinalBlockPtr = getScalarField(self, "bufferFinalBlockPtr");
    45     //iBuilder->CallPrintInt("bufferPtr", iBuilder->CreatePtrToInt(bufferPtr, iBuilder->getInt64Ty()));
    46     //iBuilder->CallPrintInt("bufferFinalBlockPtr", iBuilder->CreatePtrToInt(bufferFinalBlockPtr, iBuilder->getInt64Ty()));
     68    //iBuilder->CallPrintInt("blockNo", blockNo);
     69    Value * basePtr = getStreamSetBlockPtr(self, "codeUnitBuffer", blockNo);
     70    //iBuilder->CallPrintInt("basePtr", iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty()));
     71    Value * bytesToDo = iBuilder->CreateMul(blocksToDo, strideBytes);
     72    //iBuilder->CallPrintInt("bytesToDo", bytesToDo);
     73    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), bytesToDo}));
    4774   
     75    setScalarField(self, blockNoScalar, iBuilder->CreateAdd(blockNo, blocksToDo));
     76    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride));
     77    setProcessedItemCount(self, processed);
     78    mStreamSetInputBuffers[0]->setConsumerPos(streamStructPtr, processed);
     79    // Must be the last action, for synchronization.
     80    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(iBuilder->getSizeTy(), 1)));
    4881   
    49     BasicBlock * flushBlock = BasicBlock::Create(iBuilder->getContext(), "flush", doBlockFunction, 0);
    50     BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", doBlockFunction, 0);
    51     Value * inFinal = iBuilder->CreateICmpUGT(bufferPtr, bufferFinalBlockPtr);
    52     iBuilder->CreateCondBr(inFinal, flushBlock, exitBlock);
    53    
    54     iBuilder->SetInsertPoint(flushBlock);
    55     Value * basePtr = getScalarField(self, "bufferBasePtr");
    56     //iBuilder->CallPrintInt("bufferBasePtr", iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty()));
    57     Value * baseAddress = iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty());
    58     Value * pointerAddress = iBuilder->CreatePtrToInt(bufferPtr, iBuilder->getInt64Ty());
    59     Value * bytesToFlush = iBuilder->CreateSub(pointerAddress, baseAddress);
    60    
    61     iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), bytesToFlush}));
    62     // Buffer is flushed, return the buffer base pointer for subsequent output to the buffer.
    63     iBuilder->CreateRet(basePtr);
    64 
    65     iBuilder->SetInsertPoint(exitBlock);
    66     iBuilder->CreateRet(bufferPtr);
     82    iBuilder->CreateRetVoid();
    6783    iBuilder->restoreIP(savePoint);
    6884}
     
    7793    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_flush", finalBlockFunction, 0));
    7894    Value * self = getParameter(finalBlockFunction, "self");
    79     Value * bufferPtr = getParameter(finalBlockFunction, "bufferPtr");
    80     Value * basePtr = getScalarField(self, "bufferBasePtr");
    81     // Flush the output.
    82     Value * baseAddress = iBuilder->CreatePtrToInt(basePtr, iBuilder->getInt64Ty());
    83     Value * pointerAddress = iBuilder->CreatePtrToInt(bufferPtr, iBuilder->getInt64Ty());
    84     Value * bytesToFlush = iBuilder->CreateSub(pointerAddress, baseAddress);
     95    Value * streamStructPtr = getStreamSetStructPtr(self, "codeUnitBuffer");
     96    LoadInst * producerPos = iBuilder->CreateAlignedLoad(mStreamSetInputBuffers[0]->getProducerPosPtr(streamStructPtr), sizeof(size_t));
     97    producerPos->setOrdering(AtomicOrdering::Acquire);
     98    Value * processed = getProcessedItemCount(self);
     99    Value * itemsAvail = iBuilder->CreateSub(producerPos, processed);
     100    Value * blockNo = getScalarField(self, blockNoScalar);
     101    Value * basePtr = getStreamSetBlockPtr(self, "codeUnitBuffer", blockNo);
     102    Value * bytesToDo = iBuilder->CreateMul(itemsAvail, ConstantInt::get(iBuilder->getSizeTy(), mCodeUnitWidth/8));
     103
     104    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), bytesToDo}));
    85105   
    86     iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(basePtr, i8PtrTy), bytesToFlush}));
    87     // Buffer is flushed, return the buffer base pointer for subsequent output to the buffer.
    88     iBuilder->CreateRet(basePtr);
     106    setProcessedItemCount(self, producerPos);
     107    mStreamSetInputBuffers[0]->setConsumerPos(streamStructPtr, producerPos);
     108    iBuilder->CreateRetVoid();
    89109    iBuilder->restoreIP(savePoint);
    90110}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5133 r5185  
    1818    stdOutKernel(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth) :
    1919    KernelBuilder(iBuilder, "stdout",
    20                   {StreamSetBinding{parabix::StreamSetType(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {}, {}) {
    21         mStreamType = PointerType::get(parabix::StreamSetType(1, codeUnitWidth).getStreamSetBlockType(iBuilder), 0);
    22         mScalarInputs = {ScalarBinding{mStreamType , "bufferPtr"}};
    23     }
     20                  {StreamSetBinding{parabix::StreamSetType(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {}, {}),
     21    mCodeUnitWidth(codeUnitWidth) {}
    2422   
    2523private:
    26     void prepareKernel() override;
     24    unsigned mCodeUnitWidth;
     25 
    2726    void generateDoBlockMethod() override;
    2827    void generateFinalBlockMethod() override;
    2928    void generateDoSegmentMethod() override;
    3029   
    31     llvm::Type * mStreamType;
    3230};
    3331}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5184 r5185  
    1515enum SS_struct_index {iProducer_pos = 0, iConsumer_pos = 1, iEnd_of_input = 2, iBuffer_ptr = 3};
    1616
     17llvm::Value * parabix::getProducerPosPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr) {
     18    return b->CreateGEP(bufferStructPtr, {b->getInt32(0), b->getInt32(iProducer_pos)});
     19}
     20
     21llvm::Value * parabix::getConsumerPosPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr) {
     22    return b->CreateGEP(bufferStructPtr, {b->getInt32(0), b->getInt32(iConsumer_pos)});
     23}
     24
     25llvm::Value * parabix::hasEndOfInputPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr) {
     26    return b->CreateGEP(bufferStructPtr, {b->getInt32(0), b->getInt32(iEnd_of_input)});
     27}
     28
     29llvm::Value * parabix::getStreamSetBufferPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr) {
     30    return b->CreateLoad(b->CreateGEP(bufferStructPtr, {b->getInt32(0), b->getInt32(iBuffer_ptr)}));
     31}
     32
    1733llvm::Type * StreamSetType::getStreamSetBlockType(IDISA::IDISA_Builder * iBuilder) {
    1834    llvm::Type * streamType = mFieldWidth == 1 ? iBuilder->getBitBlockType() : ArrayType::get(iBuilder->getBitBlockType(), mFieldWidth);
     
    2844}
    2945
    30 llvm::Value * StreamSetBuffer::getProducerPosPtr(Value * ptr) {
    31     return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)});
     46llvm::Value * StreamSetBuffer::getProducerPosPtr(Value * bufferStructPtr) {
     47    return iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)});
    3248}
    3349
    34 void StreamSetBuffer::setProducerPos(Value * ptr, llvm::Value * pos){
    35     iBuilder->CreateStore(pos, iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     50void StreamSetBuffer::setProducerPos(Value * bufferStructPtr, llvm::Value * pos){
     51    iBuilder->CreateStore(pos, iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    3652}
    3753
    38 llvm::Value * StreamSetBuffer::getConsumerPosPtr(Value * ptr) {
    39     return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
     54llvm::Value * StreamSetBuffer::getConsumerPosPtr(Value * bufferStructPtr) {
     55    return iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
    4056}
    4157
    42 void StreamSetBuffer::setConsumerPos(Value * ptr, Value * pos){
    43     iBuilder->CreateStore(pos, iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     58void StreamSetBuffer::setConsumerPos(Value * bufferStructPtr, Value * pos){
     59    iBuilder->CreateStore(pos, iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    4460}
    4561
    46 llvm::Value * StreamSetBuffer::hasEndOfInputPtr(Value * ptr) {
    47     return iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)});
     62llvm::Value * StreamSetBuffer::hasEndOfInputPtr(Value * bufferStructPtr) {
     63    return iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)});
    4864}
    4965
    50 void StreamSetBuffer::setEndOfInput(Value * ptr){
    51     iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), 1), iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
     66void StreamSetBuffer::setEndOfInput(Value * bufferStructPtr){
     67    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), 1), iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    5268}
     69
    5370
    5471llvm::Value * StreamSetBuffer::getStreamSetStructPtr(){
    5572    return mStreamSetStructPtr;
    5673}
    57 // Single Block Buffer
    5874
    59 size_t SingleBlockBuffer::getBufferSize() {
    60     return 1; //iBuilder->getBitBlockWidth();
    61 }
    62 
    63 llvm::Value * SingleBlockBuffer::allocateBuffer() {
     75llvm::Value * StreamSetBuffer::allocateBuffer() {
    6476    Type * const size_ty = iBuilder->getSizeTy();
    6577    Type * const int8ty = iBuilder->getInt8Ty();
    66     mStreamSetBufferPtr = iBuilder->CreateAlloca(mStreamSetType.getStreamSetBlockType(iBuilder));
    67     mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
     78    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetType.getStreamSetBlockType(iBuilder), ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks));
     79    mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType);
     80    //iBuilder->CallPrintInt("mStreamSetBufferPtr", iBuilder->CreatePtrToInt(mStreamSetBufferPtr, iBuilder->getInt64Ty()));
     81    //iBuilder->CallPrintInt("mStreamSetStructPtr", iBuilder->CreatePtrToInt(mStreamSetStructPtr, iBuilder->getInt64Ty()));
    6882    iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    6983    iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    7084    iBuilder->CreateStore(ConstantInt::get(int8ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    7185    iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
     86   
    7287    return mStreamSetBufferPtr;
    7388}
    7489
     90// Single Block Buffer
    7591// For a single block buffer, the block pointer is always the buffer base pointer.
    76 llvm::Value * SingleBlockBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
    77     Value * handle = iBuilder->CreateGEP(basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     92llvm::Value * SingleBlockBuffer::getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) {
     93    Value * handle = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
    7894    return iBuilder->CreateLoad(handle);
    7995}
     
    8197
    8298// External Unbounded Buffer
    83 
    84 size_t ExternalFileBuffer::getBufferSize() {
    85     return 0;
    86 }
    8799
    88100void ExternalFileBuffer::setStreamSetBuffer(llvm::Value * ptr, Value * fileSize) {
     
    94106    mStreamSetBufferPtr = iBuilder->CreatePointerBitCastOrAddrSpaceCast(ptr, t);
    95107
    96     mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
     108    mStreamSetStructPtr = iBuilder->CreateCacheAlignedAlloca(mStreamSetStructType);
    97109    iBuilder->CreateStore(fileSize, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    98110    iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
     
    105117}
    106118
    107 llvm::Value * ExternalFileBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
    108     Value * handle = iBuilder->CreateGEP(basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     119llvm::Value * ExternalFileBuffer::getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) {
     120    Value * handle = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
    109121    return iBuilder->CreateGEP(iBuilder->CreateLoad(handle), {blockNo});
    110122}
     
    113125// Circular Stack Allocated Buffer
    114126
    115 size_t CircularBuffer::getBufferSize() {
    116     return mBufferBlocks; // * iBuilder->getBitBlockWidth();
     127llvm::Value * CircularBuffer::getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) {
     128    Value * handle = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     129    // Circular access is based on blockNo mod mBufferBlocks.  For power of 2 buffer sizes (required), we
     130    // use bitwise masking to efficiently compute the mod function  (blockNo & (mBufferBlocks - 1)
     131    Value * bufPtr = iBuilder->CreateLoad(handle);
     132    //iBuilder->CallPrintInt("CircularBuffer bufPtr", iBuilder->CreatePtrToInt(bufPtr, iBuilder->getSizeTy()));
     133    return iBuilder->CreateGEP(bufPtr, {iBuilder->CreateAnd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks-1))});
    117134}
    118135
    119 llvm::Value * CircularBuffer::allocateBuffer() {
    120     Type * const size_ty = iBuilder->getSizeTy();
    121     Type * const int8ty = iBuilder->getInt8Ty();
    122     mStreamSetBufferPtr = iBuilder->CreateAlloca(mStreamSetType.getStreamSetBlockType(iBuilder), ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks));
    123     mStreamSetStructPtr = iBuilder->CreateAlloca(mStreamSetStructType);
    124     iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
    125     iBuilder->CreateStore(ConstantInt::get(size_ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)}));
    126     iBuilder->CreateStore(ConstantInt::get(int8ty, 0), iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iEnd_of_input)}));
    127     iBuilder->CreateStore(mStreamSetBufferPtr, iBuilder->CreateGEP(mStreamSetStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)}));
    128 
    129     return mStreamSetBufferPtr;
     136llvm::Value * LinearBuffer::getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) {
     137    Constant * blockWidth = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
     138    Value * consumerPos_ptr = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
     139    Value * consumerPos = iBuilder->CreateLoad(consumerPos_ptr);
     140    Value * consumerBlock = iBuilder->CreateUDiv(consumerPos, blockWidth);
     141    Value * handle = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     142    Value * bufPtr = iBuilder->CreateLoad(handle);
     143    //iBuilder->CallPrintInt("LinearBuffer bufPtr", iBuilder->CreatePtrToInt(bufPtr, iBuilder->getSizeTy()));
     144    return iBuilder->CreateGEP(bufPtr, {iBuilder->CreateSub(blockNo, consumerBlock)});
    130145}
    131146
    132 llvm::Value * CircularBuffer::getStreamSetBlockPointer(llvm::Value * basePtr, llvm::Value * blockNo) {
    133     Value * handle = iBuilder->CreateGEP(basePtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
    134     return iBuilder->CreateGEP(iBuilder->CreateLoad(handle), {iBuilder->CreateAnd(blockNo, ConstantInt::get(iBuilder->getSizeTy(), mBufferBlocks-1))});
    135 }
     147void LinearBuffer::setConsumerPos(Value * bufferStructPtr, Value * new_consumer_pos) {
     148    Type * const i1 = iBuilder->getInt1Ty();
     149    Type * const i8 = iBuilder->getInt8Ty();
     150    Type * const i32 = iBuilder->getInt32Ty();
     151    Type * const i8_ptr = PointerType::get(i8, mAddrSpace);
     152    Module * M = iBuilder->getModule();
     153    Function * memcpyFunc = cast<Function>(M->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i" + std::to_string(sizeof(size_t) * 8),
     154                                                                  iBuilder->getVoidTy(), i8_ptr, i8_ptr, iBuilder->getSizeTy(), i32, i1, nullptr));
     155    Function * current = iBuilder->GetInsertBlock()->getParent();
     156    BasicBlock * copyBackBody = BasicBlock::Create(M->getContext(), "copy_back", current, 0);
     157    BasicBlock * setConsumerPosExit = BasicBlock::Create(M->getContext(), "setConsumerPos_done", current, 0);
     158    Constant * blockWidth = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
     159    Constant * one = ConstantInt::get(iBuilder->getSizeTy(), 1);
     160    Value * consumerPos_ptr = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iConsumer_pos)});
     161    Value * consumerPos = iBuilder->CreateLoad(consumerPos_ptr);
     162    Value * consumerBlock = iBuilder->CreateUDiv(consumerPos, blockWidth);
     163    // Ensure that the new consumer position is no less than the current position.
     164    new_consumer_pos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(new_consumer_pos, consumerPos), consumerPos, new_consumer_pos);
     165    Value * producerPos = iBuilder->CreateLoad(iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iProducer_pos)}));
     166    // Ensure that the new consumer position is no greater than the current producer position.
     167    Value * new_pos_lt_producer_pos = iBuilder->CreateICmpULT(new_consumer_pos, producerPos);
     168    new_consumer_pos = iBuilder->CreateSelect(new_pos_lt_producer_pos, new_consumer_pos, producerPos);
     169    // Now, the new_consumer_pos is at most = to the producer_pos; if =, we're done.
     170    iBuilder->CreateCondBr(new_pos_lt_producer_pos, copyBackBody, setConsumerPosExit);
     171    iBuilder->SetInsertPoint(copyBackBody);
     172   
     173    Value * new_consumer_block = iBuilder->CreateUDiv(new_consumer_pos, blockWidth);
     174   
     175    Value * lastProducerBlock = iBuilder->CreateUDiv(iBuilder->CreateSub(producerPos, one), blockWidth);
     176    //iBuilder->CallPrintInt("new_consumer_block", new_consumer_block);
     177    //iBuilder->CallPrintInt("lastProducerBlock", lastProducerBlock);
    136178
     179    Value * copyBlocks = iBuilder->CreateAdd(iBuilder->CreateSub(lastProducerBlock, new_consumer_block), one);
     180    Constant * blockBytes = ConstantInt::get(iBuilder->getSizeTy(), mStreamSetType.StreamCount() * mStreamSetType.StreamFieldWidth() * iBuilder->getStride()/8);
     181    Value * copyLength = iBuilder->CreateMul(copyBlocks, blockBytes);
     182    //iBuilder->CallPrintInt("memcpy copyLength", copyLength);
     183    // Must copy back one full block for each of the streams in the stream set.
     184    Value * handle = iBuilder->CreateGEP(bufferStructPtr, {iBuilder->getInt32(0), iBuilder->getInt32(iBuffer_ptr)});
     185    Value * bufferPtr = iBuilder->CreateLoad(handle);
     186    //iBuilder->CallPrintInt("memcpy bufferPtr", iBuilder->CreatePtrToInt(bufferPtr, iBuilder->getSizeTy()));
     187
     188    Value * copyFrom = iBuilder->CreateGEP(bufferPtr, {iBuilder->CreateSub(new_consumer_block, consumerBlock)});
     189    //iBuilder->CallPrintInt("memcpy copyFrom", iBuilder->CreatePtrToInt(copyFrom, iBuilder->getSizeTy()));
     190    Value * alignment = ConstantInt::get(iBuilder->getInt32Ty(), iBuilder->getBitBlockWidth()/8);
     191   
     192    iBuilder->CreateCall(memcpyFunc, {iBuilder->CreateBitCast(bufferPtr, i8_ptr), iBuilder->CreateBitCast(copyFrom, i8_ptr), copyLength, alignment, ConstantInt::getNullValue(i1)});
     193    iBuilder->CreateBr(setConsumerPosExit);
     194    // Copy back done, store the new consumer position.
     195    iBuilder->SetInsertPoint(setConsumerPosExit);
     196    iBuilder->CreateStore(new_consumer_pos, consumerPos_ptr);
     197}   
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5184 r5185  
    3030};
    3131
     32   
     33// Stream Set Structs hold information about the current state
     34// of a stream set buffer.
     35   
     36llvm::Value * getProducerPosPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr);   
     37llvm::Value * getConsumerPosPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr);
     38llvm::Value * hasEndOfInputPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr);   
     39llvm::Value * getStreamSetBufferPtr(IDISA::IDISA_Builder * b, Value * bufferStructPtr);   
     40   
    3241class StreamSetBuffer {
    3342public:
    34     enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, ExpandingBuffer};
     43    enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, LinearBuffer, ExpandingBuffer};
    3544    inline BufferKind getBufferKind() const {return mBufferKind;}
    3645    inline StreamSetType& getBufferStreamSetType() {return mStreamSetType;}
     
    3847    llvm::PointerType * getStreamBufferPointerType();
    3948
    40     virtual size_t getBufferSize() = 0;
     49    size_t getBufferSize() { return mBufferBlocks;}
    4150   
    42     virtual llvm::Value * allocateBuffer() = 0;
     51    virtual llvm::Value * allocateBuffer();
    4352   
    4453    llvm::Value * getStreamSetBasePtr() {return mStreamSetBufferPtr;}
    4554   
    4655    // Get the buffer pointer for a given block of the stream.
    47     virtual llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) = 0;
     56    virtual llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) = 0;
    4857   
    49     virtual llvm::Value * getProducerPosPtr(Value * ptr);
     58    llvm::Value * getProducerPosPtr(Value * bufferStructPtr);
    5059
    51     virtual void setProducerPos(Value * ptr, Value * pos);
     60    void setProducerPos(Value * bufferStructPtr, Value * pos);
    5261
    53     virtual llvm::Value * getConsumerPosPtr(Value * ptr);
     62    llvm::Value * getConsumerPosPtr(Value * bufferStructPtr);
    5463
    55     virtual void setConsumerPos(Value * ptr, Value * pos);
     64    virtual void setConsumerPos(Value * bufferStructPtr, Value * pos);
    5665
    57     virtual llvm::Value * hasEndOfInputPtr(Value * ptr);
     66    llvm::Value * hasEndOfInputPtr(Value * bufferStructPtr);
    5867
    59     virtual void setEndOfInput(Value * ptr);
     68    void setEndOfInput(Value * bufferStructPtr);
     69   
     70    llvm::Value * getStreamSetBufferPtrPtr(Value * bufferStructPtr);
    6071
    6172    virtual llvm::PointerType * getStreamSetStructPointerType();
     
    6475   
    6576protected:
    66     StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, StreamSetType ss_type, int AddressSpace = 0) :
    67         mBufferKind(k), iBuilder(b), mStreamSetType(ss_type), mBufferBlocks(1), mAddrSpace(AddressSpace), mStreamSetBufferPtr(nullptr) {
     77    StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, StreamSetType ss_type, unsigned blocks, unsigned AddressSpace = 0) :
     78        mBufferKind(k), iBuilder(b), mStreamSetType(ss_type), mBufferBlocks(blocks), mAddrSpace(AddressSpace), mStreamSetBufferPtr(nullptr) {
    6879            mStreamSetStructType =
    6980                StructType::get(iBuilder->getContext(),
     
    7889    StreamSetType mStreamSetType;
    7990    size_t mBufferBlocks;
    80     int mAddrSpace;
     91    unsigned mAddrSpace;
    8192    llvm::Value * mStreamSetBufferPtr;
    8293    llvm::Value * mStreamSetStructPtr;
     
    90101   
    91102    SingleBlockBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type) :
    92     StreamSetBuffer(BufferKind::BlockBuffer, b, ss_type, 0) { }
    93    
    94     size_t getBufferSize() override;
    95     llvm::Value * allocateBuffer() override;
    96     llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) override;
     103    StreamSetBuffer(BufferKind::BlockBuffer, b, ss_type, 1, 0) {}
     104    llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) override;
    97105};
    98106   
     
    101109    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::ExternalFileBuffer;}
    102110   
    103     ExternalFileBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type, int AddressSpace = 0) :
    104         StreamSetBuffer(BufferKind::ExternalFileBuffer, b, ss_type, AddressSpace) {}
     111    ExternalFileBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type, unsigned AddressSpace = 0) :
     112        StreamSetBuffer(BufferKind::ExternalFileBuffer, b, ss_type, 0, AddressSpace) {}
    105113
    106114    void setStreamSetBuffer(llvm::Value * ptr, llvm::Value * fileSize);
    107115   
    108     size_t getBufferSize() override;
    109116    // Can't allocate - raise an error. */
    110117    llvm::Value * allocateBuffer() override;
    111     llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) override;
     118    llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) override;
    112119
    113120};
     
    117124    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::CircularBuffer;}
    118125 
    119     CircularBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type, size_t bufferBlocks) :
    120         StreamSetBuffer(BufferKind::CircularBuffer, b, ss_type) {
    121             mBufferBlocks = bufferBlocks;
     126    CircularBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type, size_t bufferBlocks, unsigned AddressSpace = 0) :
     127        StreamSetBuffer(BufferKind::CircularBuffer, b, ss_type, bufferBlocks, AddressSpace) {
    122128            if (((bufferBlocks - 1) & bufferBlocks) != 0) {
    123129                throw std::runtime_error("CircularStreamSetBuffer: number of blocks must be a power of 2!");
    124130            }
    125131        }
    126 
    127     size_t getBufferSize() override;
    128     llvm::Value * allocateBuffer() override;
    129     llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferBasePtr, llvm::Value * blockNo) override;
     132    llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) override;
    130133};
     134   
     135// Linear buffers extending from the current ConsumerPos forward.   Within the buffer, the
     136// offset of the block containing the current consumer position is always zero.
     137//
     138class LinearBuffer : public StreamSetBuffer {
     139public:
     140    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::LinearBuffer;}
     141   
     142    LinearBuffer(IDISA::IDISA_Builder * b, StreamSetType ss_type, size_t bufferBlocks, unsigned AddressSpace = 0) :
     143        StreamSetBuffer(BufferKind::CircularBuffer, b, ss_type, bufferBlocks, AddressSpace) {}
     144   
     145    llvm::Value * getStreamSetBlockPointer(llvm::Value * bufferStructPtr, llvm::Value * blockNo) override;
     146   
     147    // Reset the buffer to contain data starting at the base block of new_consumer_pos,
     148    // copying back any data beyond that position.
     149    void setConsumerPos(Value * bufferStructPtr, Value * new_consumer_pos) override;
     150};
     151   
     152   
    131153
    132154}
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5176 r5185  
    4848static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore, cl::cat(u8u16Options));
    4949
    50 static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(u8u16Options));
     50static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(u8u16Options));
    5151
    5252//
     
    242242   
    243243    //SingleBlockBuffer DeletionCounts(iBuilder, StreamSetType(1, i1));
    244     CircularBuffer DeletionCounts(iBuilder, StreamSetType(1, i1), segmentSize * bufferSegments);
    245    
    246     CircularBuffer U16out(iBuilder, StreamSetType(1, i16), u16OutputBlocks);
     244    CircularBuffer DeletionCounts(iBuilder, StreamSetType(1, i1), segmentSize * bufferSegments );
     245   
     246    LinearBuffer U16out(iBuilder, StreamSetType(1, i16), segmentSize * bufferSegments + 2);
    247247
    248248    s2pKernel  s2pk(iBuilder);
     
    258258    p2s_16Kernel_withCompressedOutput p2sk(iBuilder);
    259259    p2sk.generateKernel({&U16Bits, &DeletionCounts}, {&U16out});
     260   
     261    stdOutKernel stdoutK(iBuilder, 16);
     262    stdoutK.generateKernel({&U16out}, {});
     263
    260264   
    261265    Type * const size_ty = iBuilder->getSizeTy();
     
    290294    Value * delInstance = delK.createInstance({});
    291295    Value * p2sInstance = p2sk.createInstance({});
     296    Value * stdoutInstance = stdoutK.createInstance({});
    292297   
    293298    Type * pthreadTy = size_ty;
     
    313318    pthreadExitFunc->setCallingConv(llvm::CallingConv::C);
    314319
    315     if (pipelineParallel){
    316         generatePipelineParallel(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk}, {s2pInstance, u8u16Instance, delInstance, p2sInstance});
     320    if (segmentPipelineParallel){
     321        generateSegmentParallelPipeline(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk, &stdoutK}, {s2pInstance, u8u16Instance, delInstance, p2sInstance, stdoutInstance}, fileSize);
    317322    }
    318323    else{
    319         generatePipelineLoop(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk}, {s2pInstance, u8u16Instance, delInstance, p2sInstance}, fileSize);
     324        generatePipelineLoop(iBuilder, {&s2pk, &u8u16k, &delK, &p2sk, &stdoutK}, {s2pInstance, u8u16Instance, delInstance, p2sInstance, stdoutInstance}, fileSize);
    320325    }
    321326
Note: See TracChangeset for help on using the changeset viewer.