Changeset 6252


Ignore:
Timestamp:
Dec 22, 2018, 6:49:06 PM (3 months ago)
Author:
nmedfort
Message:

Bug fix for consumer information + slight simplification of copyback space calculation

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp

    r6249 r6252  
    6464    BasicBlock * const segmentDone = b->CreateBasicBlock(getName() + "_segmentDone");
    6565
    66     ConstantInt * const ZERO = b->getSize(0);
    67 
    6866    b->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, mStrideLoopBody);
    6967
     
    7775    }
    7876    mStrideBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
    79     mStrideBlockIndex->addIncoming(ZERO, entryBlock);
     77    mStrideBlockIndex->addIncoming(b->getSize(0), entryBlock);
    8078
    8179    /// GENERATE DO BLOCK METHOD
     
    8583    Value * const nextStrideBlockIndex = b->CreateAdd(mStrideBlockIndex, b->getSize(1));
    8684    Value * noMore = b->CreateICmpEQ(nextStrideBlockIndex, numOfBlocks);
    87     if (hasAttribute(AttrId::CanTerminateEarly) ||  hasAttribute(AttrId::MustExplicitlyTerminate)) {
     85    if (canSetTerminateSignal()) {
    8886        noMore = b->CreateOr(noMore, b->getTerminationSignal());
    8987    }
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r6250 r6252  
    174174    PabloAST * const LF_VT_FF_CR = ccc->compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
    175175    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
    176    
     176
    177177    // Remove the CR of any CR+LF
    178178    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
     
    185185    crb.createAssign(LineBreak, removedCRLF);
    186186
    187    
     187
    188188    Zeroes * const ZEROES = pb.createZeroes();
    189189    PabloAST * const u8pfx = ccc->compileCC(makeByte(0xC0, 0xFF));
     
    200200    PabloAST * const u8pfx4 = ccc->compileCC(makeByte(0xF0, 0xF4), it);
    201201    PabloAST * const u8suffix = ccc->compileCC("u8suffix", makeByte(0x80, 0xBF), it);
    202    
     202
    203203    //
    204204    // Two-byte sequences
     
    212212
    213213    //
    214     // Three-byte sequences   
     214    // Three-byte sequences
    215215    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
    216216    auto it3 = it.createScope();
     
    244244    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
    245245    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
    246    
     246
    247247    //
    248248    // Invalid cases
     
    259259    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    260260    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
    261     //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
    262    
     261
    263262    Var * const required = getOutputStreamVar("nonFinal");
    264263    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
    265     pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
     264    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);
    266265}
    267266
     
    280279    PabloBuilder pb(getEntryScope());
    281280    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
    282    
     281
    283282    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
    284283    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF, &cc::UTF16));    //u16hi_lo_surrogate = [\xDC-\xDF]
    285    
     284
    286285    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
    287286    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
     
    305304
    306305RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
    307 : PabloKernel(kb, "RequiredStreams_UTF16",               
     306: PabloKernel(kb, "RequiredStreams_UTF16",
    308307// inputs
    309308{Binding{kb->getStreamSetTy(8), "basis"}},
     
    404403        PabloBlock * scope1 = getEntryScope()->createScope();
    405404        pb.createIf(prefixMatches, scope1);
    406        
     405
    407406        PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
    408407        PabloAST * nybbles[2];
    409408        nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
    410409        nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
    411        
     410
    412411        PabloAST * bitpairs[4];
    413412        for (unsigned i = 0; i < 2; i++) {
     
    415414            bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
    416415        }
    417        
     416
    418417        std::vector<PabloAST *> basis(8);
    419418        for (unsigned i = 0; i < 4; i++) {
     
    421420            basis[2*i + 1] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
    422421        }
    423        
     422
    424423        cc::Parabix_CC_Compiler ccc(scope1, basis);
    425424        RE_Compiler re_compiler(scope1, ccc);
     
    458457// output
    459458{Binding{"matches", matches, FixedRate(), Add1()}}) {
    460    
     459
    461460}
    462461
     
    482481    PabloBlock * scope1 = getEntryScope()->createScope();
    483482    pb.createIf(prefixMatches, scope1);
    484    
     483
    485484    PabloAST * nybbles[2];
    486485    nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
    487486    nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
    488    
     487
    489488    PabloAST * bitpairs[4];
    490489    for (unsigned i = 0; i < 2; i++) {
     
    492491        bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
    493492    }
    494    
     493
    495494    std::vector<PabloAST *> basis(8);
    496495    for (unsigned i = 0; i < 4; i++) {
     
    498497        basis[2*i + 1] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
    499498    }
    500    
     499
    501500    cc::Parabix_CC_Compiler ccc(scope1, basis);
    502501    RE_Compiler re_compiler(scope1, ccc);
     
    553552    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
    554553    pablo::Var * countResult = getOutputScalarVar("countResult");
    555    
     554
    556555    pb->createAssign(countResult, pb->createCount(pb->createInFile(toCount)));
    557556}
     
    602601    // If we're in the final block bypass the fast loop.
    603602    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
    604    
     603
    605604    b->SetInsertPoint(strideLoop);
    606605    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
     
    619618    blocksRemaining->addIncoming(nextRemaining, strideLoop);
    620619    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
    621    
     620
    622621    b->SetInsertPoint(stridesDone);
    623622    // Combine the 8 blockMin values.
     
    630629    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
    631630    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
    632    
     631
    633632    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
    634    
    635    
     633
     634
    636635    b->SetInsertPoint(finalStride);
    637636    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), itemsToDo, 1);
    638637    b->CreateBr(nullByteDetection);
    639    
     638
    640639    b->SetInsertPoint(nullByteDetection);
    641640    //  Find the exact location using memchr, which should be fast enough.
     
    644643    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
    645644    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
    646    
     645
    647646    // A null byte has been located; set the termination code and call the signal handler.
    648647    b->SetInsertPoint(nullByteFound);
     
    653652    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
    654653    b->CreateBr(segmentDone);
    655    
     654
    656655    b->SetInsertPoint(segmentDone);
    657656    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6249 r6252  
    518518}
    519519
     520#warning TODO: copyback/copyforward ought to reflect exact num of items; not upper bound of space
     521
    520522/** ------------------------------------------------------------------------------------------------------------- *
    521523 * @brief requiresCopyBack
     
    562564    for (unsigned i = 0; i < numOfOutputs; ++i) {
    563565        if (requiresCopyBack(getOutputBufferVertex(i))) {
    564             const Binding & output = mKernel->getOutputStreamSetBinding(i);
    565             const auto prefix = makeBufferName(mKernelIndex, output);
    566             BasicBlock * const copyBack = b->CreateBasicBlock(prefix + "_copyBack", mKernelExit);
    567             BasicBlock * const copyExit = b->CreateBasicBlock(prefix + "_copyBackExit", mKernelExit);
    568566            const StreamSetBuffer * const buffer = getOutputBuffer(i);
    569567            Value * const capacity = buffer->getCapacity(b.get());
     
    574572            Value * const wroteToOverflow = b->CreateICmpULT(producedOffset, priorOffset);
    575573            Value * const needsCopyBack = b->CreateAnd(nonCapacityAlignedWrite, wroteToOverflow);
    576             b->CreateUnlikelyCondBr(needsCopyBack, copyBack, copyExit);
    577 
    578             b->SetInsertPoint(copyBack);
    579             #ifdef PRINT_DEBUG_MESSAGES
    580             b->CallPrintInt(prefix + "_CopyBack", producedOffset);
    581             #endif
    582             writeOverflowCopy(b, buffer, OverflowCopy::Backwards, producedOffset);
    583             b->CreateBr(copyExit);
    584 
    585             b->SetInsertPoint(copyExit);
     574            const Binding & output = mKernel->getOutputStreamSetBinding(i);
     575            writeOverflowCopy(b, OverflowCopy::Backwards, needsCopyBack, output, buffer, producedOffset);
    586576        }
    587577    }
     
    598588        if (requiresFacsimile(getOutputBufferVertex(i))) {
    599589
    600             const Binding & output = mKernel->getOutputStreamSetBinding(i);
    601             const auto prefix = makeBufferName(mKernelIndex, output);
    602             BasicBlock * const copyForward = b->CreateBasicBlock(prefix + "_copyForward", mKernelExit);
    603             BasicBlock * const copyExit = b->CreateBasicBlock(prefix + "_copyForwardExit", mKernelExit);
    604590            const StreamSetBuffer * const buffer = getOutputBuffer(i);
    605591
     
    609595
    610596            // If we wrote anything and it was not our first write to the buffer ...
     597            const Binding & output = mKernel->getOutputStreamSetBinding(i);
    611598            Value * overwroteData = b->CreateICmpUGT(produced, capacity);
    612599            if (LLVM_LIKELY(mKernel->getLowerBound(output) < 1)) {
     
    629616            Value * const needsCopyForward = b->CreateOr(wroteToFirstBlock, wroteFromEndToStart);
    630617
    631 
    632             b->CreateUnlikelyCondBr(needsCopyForward, copyForward, copyExit);
    633 
    634618            // TODO: optimize this further to ensure that we don't copy data that was just copied back from
    635619            // the overflow. Should be enough just to have a "copyback flag" phi node to say it that was the
    636620            // last thing it did to the buffer.
    637621
    638             // TODO: look into non-cache-polluting writes? How big does the buffer need to be before it helps?
    639 
    640             b->SetInsertPoint(copyForward);
    641             #ifdef PRINT_DEBUG_MESSAGES
    642             b->CallPrintInt(prefix + "_CopyForward.initialOffset", initialOffset);
    643             b->CallPrintInt(prefix + "_CopyForward.producedOffset", producedOffset);
    644             #endif
    645             writeOverflowCopy(b, buffer, OverflowCopy::Forwards, overflowSize);
    646             b->CreateBr(copyExit);
    647 
    648             b->SetInsertPoint(copyExit);
    649         }
    650     }
    651 }
     622            writeOverflowCopy(b, OverflowCopy::Forwards, needsCopyForward, output, buffer, overflowSize);
     623        }
     624    }
     625}
     626
     627
    652628
    653629/** ------------------------------------------------------------------------------------------------------------- *
    654630 * @brief writeOverflowCopy
    655631 ** ------------------------------------------------------------------------------------------------------------- */
    656 Value * PipelineCompiler::writeOverflowCopy(BuilderRef b, const StreamSetBuffer * const buffer, const OverflowCopy direction, Value * const itemsToCopy) const {
     632void PipelineCompiler::writeOverflowCopy(BuilderRef b, const OverflowCopy direction, Value * cond, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemsToCopy) const {
     633
     634    const auto prefix = makeBufferName(mKernelIndex, binding)
     635        + ((direction == OverflowCopy::Forwards) ? "_copyForward" : "_copyBack");
     636
     637    BasicBlock * const copyLoop = b->CreateBasicBlock(prefix + "Loop", mKernelExit);
     638    BasicBlock * const copyExit = b->CreateBasicBlock(prefix + "Exit", mKernelExit);
     639
    657640    Value * const count = buffer->getStreamSetCount(b.get());
    658641    Value * blocksToCopy = b->CreateMul(itemsToCopy, count);
     
    664647        blocksToCopy = b->CreateMul(blocksToCopy, b->getSize(itemWidth / blockWidth));
    665648    }
    666     const auto bytesPerBlock = blockWidth / 8;
    667     Value * const bytesToCopy = b->CreateMul(blocksToCopy, b->getSize(bytesPerBlock));
    668649    Value * const base = buffer->getBaseAddress(b.get());
    669650    Value * const overflow = buffer->getOverflowAddress(b.get());
    670651    Value * const source = (direction == OverflowCopy::Forwards) ? base : overflow;
    671652    Value * const target = (direction == OverflowCopy::Forwards) ? overflow : base;
    672     b->CreateMemCpy(target, source, bytesToCopy, bytesPerBlock);
    673     return bytesToCopy;
     653
     654    BasicBlock * const entryBlock = b->GetInsertBlock();
     655    b->CreateUnlikelyCondBr(cond, copyLoop, copyExit);
     656
     657    b->SetInsertPoint(copyLoop);
     658    PHINode * const index = b->CreatePHI(b->getSizeTy(), 2);
     659    index->addIncoming(b->getSize(0), entryBlock);
     660    Value * const val = b->CreateBlockAlignedLoad(b->CreateGEP(source, index));
     661    b->CreateBlockAlignedStore(val, b->CreateGEP(target, index));
     662    Value * const nextIndex = b->CreateAdd(index, b->getSize(1));
     663    index->addIncoming(nextIndex, b->GetInsertBlock());
     664    Value * const notDone =b->CreateICmpNE(nextIndex, blocksToCopy);
     665    b->CreateCondBr(notDone, copyLoop, copyExit);
     666
     667    b->SetInsertPoint(copyExit);
    674668}
    675669
     
    699693 * Returns the address of the "zeroth" item of the (logically-unbounded) stream set.
    700694 ** ------------------------------------------------------------------------------------------------------------- */
    701 inline Value * PipelineCompiler::calculateLogicalBaseAddress(BuilderRef b, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemCount) {
     695Value * PipelineCompiler::calculateLogicalBaseAddress(BuilderRef b, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemCount) {
    702696    Constant * const LOG_2_BLOCK_WIDTH = b->getSize(floor_log2(b->getBitBlockWidth()));
    703697    Constant * const ZERO = b->getSize(0);
     
    713707        Value * const A0 = buffer->getStreamBlockPtr(b.get(), ZERO, blockIndex);
    714708        Value * const B0 = tmp.getStreamBlockPtr(b.get(), ZERO, blockIndex);
    715         Value * const B1 = b->CreatePointerCast(B0, A0->getType());
    716         b->CreateAssert(b->CreateICmpEQ(A0, B1), prefix + ": logical base address is incorrect");
     709        Value * const C0 = b->CreatePointerCast(B0, A0->getType());
     710        b->CreateAssert(b->CreateICmpEQ(A0, C0), prefix + ": logical base address is incorrect");
     711        Value * upToIndex = b->CreateAdd(blockIndex, b->CreateSub(mNumOfLinearStrides, b->getSize(1)));
     712        upToIndex = b->CreateSelect(b->CreateICmpEQ(mNumOfLinearStrides, ZERO), blockIndex, upToIndex);
     713        Value * const A1 = buffer->getStreamBlockPtr(b.get(), ZERO, upToIndex);
     714        Value * const B1 = tmp.getStreamBlockPtr(b.get(), ZERO, upToIndex);
     715        Value * const C1 = b->CreatePointerCast(B1, A1->getType());
     716        b->CreateAssert(b->CreateICmpEQ(A1, C1), prefix + ": logical base address is incorrect");
    717717    }
    718718    return address;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6249 r6252  
    11#include "pipeline_compiler.hpp"
    22
    3 const static std::string TERMINATION_SIGNAL = "terminationSignal";
     3const static std::string TERMINATION_SIGNAL_SUFFIX = "terminationSignal";
    44
    55namespace kernel {
     
    1313    b->setKernel(mPipelineKernel);
    1414    for (unsigned i = 0; i < numOfKernels; ++i) {
     15        addBufferHandlesToPipelineKernel(b, i);
    1516        addInternalKernelProperties(b, i);
    16         addBufferHandlesToPipelineKernel(b, i);
    1717        addPopCountScalarsToPipelineKernel(b, i);
    1818    }
    1919    b->setKernel(mPipelineKernel);
    2020}
    21 
    22 //const static std::string PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
    23 //const static std::string PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
    24 // const static std::string NON_DEFERRED_ITEM_COUNT_SUFFIX = "_nonDeferredItemCount";
    25 // const static std::string LOGICAL_SEGMENT_NO_SCALAR = "segmentNo";
    2621
    2722/** ------------------------------------------------------------------------------------------------------------- *
     
    3429    const auto name = makeKernelName(kernelIndex);
    3530    // TODO: prove two termination signals can be fused into a single counter?
    36     mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL);
     31    mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL_SUFFIX);
    3732    mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
    3833
     
    185180        BasicBlock * const enteringFinalStride = b->CreateBasicBlock(prefix + "_finalStride", mKernelLoopCall);
    186181
     182        mNumOfLinearStrides = b->CreateUMin(mNumOfLinearStrides, b->getSize(1));
     183
    187184        isFinal = b->CreateICmpEQ(mNumOfLinearStrides, b->getSize(0));
    188185
     
    206203
    207204    } else {
     205
    208206        mNumOfLinearStrides = b->getSize(1);
    209207        calculateNonFinalItemCounts(b);
    210208        b->CreateBr(mKernelLoopCall);
     209
    211210    }
    212211
     
    319318        const auto bufferVertex = source(e, mBufferGraph);
    320319        const BufferNode & bn = mBufferGraph[bufferVertex];
    321         const StreamSetBuffer * const buffer = bn.Buffer;
     320        const ExternalBuffer * const buffer = cast<ExternalBuffer>(bn.Buffer);
    322321
    323322        Value * const produced = bn.TotalItems; assert (produced);
    324323        Value * const consumed = b->getSize(0);  assert (consumed);
    325         Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed, getCopyBack(bufferVertex));
    326 
    327 //        const auto kernelVertex = parent(bufferVertex, mBufferGraph);
     324        Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed);
     325
    328326        const BufferRateData & rd = mBufferGraph[e];
    329327        const auto outputPort = rd.Port;
     
    605603    b->setKernel(mPipelineKernel);
    606604    const auto prefix = makeKernelName(mKernelIndex);
    607     Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL);
     605    Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL_SUFFIX);
    608606    b->setKernel(mKernel);
    609607    return b->CreateICmpNE(terminated, b->getSize(0));
     
    616614    const auto prefix = makeKernelName(mKernelIndex);
    617615    b->setKernel(mPipelineKernel);
    618     b->setScalarField(prefix + TERMINATION_SIGNAL, b->CreateZExtOrTrunc(value, b->getSizeTy()));
     616    b->setScalarField(prefix + TERMINATION_SIGNAL_SUFFIX, b->CreateZExtOrTrunc(value, b->getSizeTy()));
    619617    #ifdef PRINT_DEBUG_MESSAGES
    620618    b->CallPrintInt("*** " + prefix + "_terminated ***", value);
     
    635633    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    636634    for (unsigned i = 0; i < numOfInputs; ++i) {
    637         // TODO: set these to the total produced item count for that input?
    638         mUpdatedProcessedPhi[i]->addIncoming(mFinalProcessedPhi[i], exitBlock);
     635        Value * const totalCount = getTotalItemCount(b, i);
     636        mUpdatedProcessedPhi[i]->addIncoming(totalCount, exitBlock);
    639637        if (mUpdatedProcessedDeferredPhi[i]) {
    640             mUpdatedProcessedDeferredPhi[i]->addIncoming(mFinalProcessedPhi[i], exitBlock);
     638            mUpdatedProcessedDeferredPhi[i]->addIncoming(totalCount, exitBlock);
    641639        }
    642640    }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6249 r6252  
    6262    #endif
    6363    Value * const hasEnough = b->CreateICmpUGE(accessible, requiredInput);
    64     Value * const hasTerminated = hasProducerTerminated(b, inputPort);
     64    Value * const hasTerminated = producerTerminated(inputPort);
    6565    Value * const sufficientInput = b->CreateOr(hasEnough, hasTerminated);
    6666    mAccessibleInputItems[inputPort] = accessible;
     
    7070
    7171/** ------------------------------------------------------------------------------------------------------------- *
    72  * @brief hasProducerTerminated
    73  ** ------------------------------------------------------------------------------------------------------------- */
    74 inline Value * PipelineCompiler::hasProducerTerminated(BuilderRef /* b */, const unsigned inputPort) const {
     72 * @brief producerTerminated
     73 ** ------------------------------------------------------------------------------------------------------------- */
     74inline Value * PipelineCompiler::producerTerminated(const unsigned inputPort) const {
    7575    const auto bufferVertex = getInputBufferVertex(inputPort);
    7676    const auto producerVertex = parent(bufferVertex, mBufferGraph);
     
    9999                        ": processed count exceeds total count");
    100100    }
    101     const auto overflow = getFacsimile(getInputBufferVertex(inputPort));
    102     Value * const accessible = buffer->getLinearlyAccessibleItems(b, processed, totalItems, overflow);
     101    ConstantInt * const facsimile = b->getSize(getFacsimile(getInputBufferVertex(inputPort)));
     102    Value * const accessible = buffer->getLinearlyAccessibleItems(b, processed, totalItems, facsimile);
    103103    #ifdef PRINT_DEBUG_MESSAGES
    104104    b->CallPrintInt(prefix + "_accessible", accessible);
     
    122122        #endif
    123123        Value * const hasEnough = b->CreateICmpULE(strideLength, writable, prefix + "_hasEnough");
    124         Value * const check = b->CreateAnd(hasEnough, willNotOverwriteOverflow(b, outputPort));
    125124        BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasOutputSpace", mKernelLoopCall);
    126125        mWritableOutputItems[outputPort] = writable;
    127126        if (LLVM_UNLIKELY(isa<DynamicBuffer>(buffer))) {
    128             expandOutputBuffer(b, outputPort, check, target);
     127            expandOutputBuffer(b, outputPort, hasEnough, target);
    129128        } else {
    130             branchToTargetOrLoopExit(b, check, target);
    131         }
    132     }
    133 }
    134 
    135 /** ------------------------------------------------------------------------------------------------------------- *
    136  * @brief willNotOverwriteOverflow
    137  *
    138  * check whether the potential overflow copy will overwrite the buffer
    139  ** ------------------------------------------------------------------------------------------------------------- */
    140 inline Value * PipelineCompiler::willNotOverwriteOverflow(BuilderRef b, const unsigned outputPort) {
    141     if (LLVM_UNLIKELY(requiresCopyBack(getOutputBufferVertex(outputPort)))) {
    142         Value * const produced = mAlreadyProducedPhi[outputPort];
    143         Value * const consumed = getConsumedItemCount(b, outputPort);
    144         Value * const unconsumed = b->CreateSub(produced, consumed);
    145         Value * const strideLength = getOutputStrideLength(b, outputPort);
    146         Value * const required = b->CreateAdd(unconsumed, strideLength);
    147         const StreamSetBuffer * const buffer = getOutputBuffer(outputPort);
    148         Value * const capacity = buffer->getCapacity(b.get());
    149         const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    150         const auto prefix = makeBufferName(mKernelIndex, output);
    151         return b->CreateICmpULT(required, capacity, prefix + "_noOverflowOverwrite");
    152     } else {
    153         return b->getTrue();
     129            branchToTargetOrLoopExit(b, hasEnough, target);
     130        }
    154131    }
    155132}
     
    200177                        ": consumed count exceeds produced count");
    201178    }
    202     const auto overflow = getCopyBack(getOutputBufferVertex(outputPort));
    203     Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed, overflow);
     179    ConstantInt * const copyBack = b->getSize(getCopyBack(getOutputBufferVertex(outputPort)));
     180    Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed, copyBack);
    204181    #ifdef PRINT_DEBUG_MESSAGES
    205182    b->CallPrintInt(prefix + "_writable", writable);
     
    244221    #endif
    245222    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    246         Value * const term = hasProducerTerminated(b, inputPort);
     223        Value * const term = producerTerminated(inputPort);
    247224        Value * const work = b->CreateIsNotNull(numOfStrides);
    248225        Value * const progress = b->CreateOr(work, term);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6228 r6252  
    229229}
    230230
     231/** ------------------------------------------------------------------------------------------------------------- *
     232 * @brief maximumConsumed
     233 ** ------------------------------------------------------------------------------------------------------------- */
     234inline LLVM_READNONE RateValue maximumConsumed(const Kernel * const kernel, const Binding & binding) {
     235    auto ub = upperBound(kernel, binding);
     236    if (binding.hasLookahead()) {
     237        ub += binding.getLookahead();
     238    }
     239    return ub;
     240}
     241
    231242}
    232243/** ------------------------------------------------------------------------------------------------------------- *
     
    242253    const auto firstBuffer = numOfKernels + 1;
    243254
     255#if 0
     256
    244257    #warning TODO: ConsumerGraph assumes the dataflow is transitively bounded by the same initial source
    245258
    246259    #warning REVISIT: ConsumerGraph is not optimal for handling relative rate inputs
    247260
    248     std::vector<std::pair<unsigned, unsigned>> consumers; // kernel, portIndex
     261    struct ConsumerData {
     262        unsigned Kernel{0};
     263        unsigned Port{0};
     264        RateValue Minimum{0};
     265        RateValue Maximum{0};
     266
     267        inline bool operator < (const ConsumerData & other) const {
     268            return (Kernel < other.Kernel) || (Port < other.Port);
     269        }
     270    };
     271
     272    std::vector<ConsumerData> consumers;
     273#endif
    249274
    250275    for (auto bufferVertex = firstBuffer; bufferVertex < lastBuffer; ++bufferVertex) {
     
    260285        add_edge(source(pe, mBufferGraph), bufferVertex, mBufferGraph[pe].Port, G);
    261286
     287        for (const auto ce : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
     288            add_edge(bufferVertex, target(ce, mBufferGraph), mBufferGraph[ce].Port, G);
     289        }
     290
     291#if 0
     292
    262293        // collect the consumers of the i-th buffer
    263294        consumers.clear();
    264295        for (const auto e : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    265             consumers.emplace_back(target(e, mBufferGraph), mBufferGraph[e].Port);
     296            ConsumerData cd;
     297            cd.Kernel = target(e, mBufferGraph);
     298            const BufferNode & bn = mBufferGraph[cd.Kernel];
     299            const BufferRateData & rd = mBufferGraph[e];
     300            cd.Port = rd.Port;
     301            const Kernel * const kernel = mPipeline[cd.Kernel];
     302            const Binding & input = kernel->getInputStreamSetBinding(cd.Port);
     303            if (LLVM_UNLIKELY(input.hasAttribute(AttrId::Deferred))) {
     304                cd.Minimum = RateValue{0};
     305            } else {
     306                cd.Minimum = bn.Lower * rd.Minimum;
     307            }
     308            cd.Maximum = bn.Upper * rd.Maximum;
     309            if (LLVM_UNLIKELY(input.hasLookahead())) {
     310                cd.Maximum += input.getLookahead();
     311            }
     312            consumers.emplace_back(cd);
    266313        }
    267314
     
    271318        // a "fake" edge to mark the last consumer otherwise we'll set it too soon.
    272319
     320        // NOTE: here we need to consider the impact of lookahead on the use of a buffer since it may
     321        // limit how much work we can perform when nearing the end of the buffer.
     322
     323        // TODO: this takes too narrow of a view of the problem. By considering a buffer's consumers
     324        // in isolation, it does not take into account that a particular kernel may be executed fewer
     325        // times than another because of I/O constraints independent of the buffer we're considering.
     326        // Essentially, to make this optimization safe we need to prove that if a consumer has performed
     327        // k strides, all other consumers performed k.
     328
    273329        if (LLVM_LIKELY(consumers.size() > 1)) {
     330
    274331            std::sort(consumers.begin(), consumers.end());
    275332
     
    278335            for (auto j = consumers.begin() + 1; j != consumers.end(); ) {
    279336
    280                 const Kernel * const kernel_j = mPipeline[j->first];
    281                 const Binding & input_j = kernel_j->getInputStreamSetBinding(j->second);
    282                 const auto lb_j = minimumConsumed(kernel_j, input_j);
    283 
     337                const ConsumerData & Cj = *j;
    284338                for (auto k = consumers.begin(); k != j; ++k) {
    285                     const Kernel * const kernel_k = mPipeline[k->first];
    286                     const Binding & input_k = kernel_k->getInputStreamSetBinding(k->second);
    287                     const auto ub_k = upperBound(kernel_k, input_k);
    288                     if (LLVM_UNLIKELY(lb_j >= ub_k)) {
     339                    const ConsumerData & Ck = *k;
     340                    if (LLVM_UNLIKELY(Cj.Minimum >= Ck.Maximum)) {
    289341                        j = consumers.erase(j);
    290342                        goto next;
     
    293345
    294346                for (auto k = j + 1; k != consumers.end(); ++k) {
    295                     const Kernel * const kernel_k = mPipeline[k->first];
    296                     const Binding & input_k = kernel_k->getInputStreamSetBinding(k->second);
    297                     const auto ub_k = upperBound(kernel_k, input_k);
    298                     if (LLVM_UNLIKELY(lb_j >= ub_k)) {
     347                    const ConsumerData & Ck = *k;
     348                    if (LLVM_UNLIKELY(Cj.Minimum >= Ck.Maximum)) {
    299349                        j = consumers.erase(j);
    300350                        goto next;
     
    313363            add_edge(bufferVertex, consumer.first, consumer.second, G);
    314364        }
     365#endif
     366
    315367    }
    316368
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6249 r6252  
    1818#include <queue>
    1919
    20 //#define PRINT_DEBUG_MESSAGES
     20// #define PRINT_DEBUG_MESSAGES
    2121
    2222using namespace boost;
     
    227227    void writeCopyForwardLogic(BuilderRef b);
    228228    enum class OverflowCopy { Forwards, Backwards };
    229     Value * writeOverflowCopy(BuilderRef b, const StreamSetBuffer * const buffer, const OverflowCopy direction, Value * const itemsToCopy) const;
     229    void writeOverflowCopy(BuilderRef b, const OverflowCopy direction, Value * cond, const Binding & binding, const StreamSetBuffer * const buffer, Value * const itemsToCopy) const;
    230230
    231231
     
    264264    Value * getWritableOutputItems(BuilderRef b, const unsigned outputPort);
    265265    Value * calculateBufferExpansionSize(BuilderRef b, const unsigned outputPort);
    266     Value * willNotOverwriteOverflow(BuilderRef b, const unsigned outputPort);
    267266    Value * addLookahead(BuilderRef b, const unsigned inputPort, Value * itemCount) const;
    268267    Value * subtractLookahead(BuilderRef b, const unsigned inputPort, Value * itemCount) const;
     
    270269    Value * truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount, Value * all) const;
    271270    Value * getTotalItemCount(BuilderRef b, const unsigned inputPort) const;
    272     Value * hasProducerTerminated(BuilderRef b, const unsigned inputPort) const;
     271    Value * producerTerminated(const unsigned inputPort) const;
    273272    Value * initiallyTerminated(BuilderRef b) const;
    274273    void setTerminated(BuilderRef b, Value * const terminated);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6249 r6252  
    2828
    2929    forEachOutputBufferThatIsAPopCountReference(mKernelIndex, [&](const unsigned bufferVertex) {
    30 
    31         // TODO: if we store the partial sum, we can save computation costs when
    32         // a particular reference is shared between multiple kernels. However,
    33         // unless we prove that every kernel that shares this buffer progresses
    34         // at the same rate, using the partial sum becomes more complicated as
    35         // we need to
    3630
    3731        const auto bufferPort = mBufferGraph[in_edge(bufferVertex, mBufferGraph)].Port;
     
    251245            Value * const total = getTotalItemCount(b, refPortNum);
    252246            Value * const strideLength = getInputStrideLength(b, refPortNum);
    253             Value * const term = hasProducerTerminated(b, refPortNum);
     247            Value * const term = producerTerminated(refPortNum);
    254248            Value * const strideLengthMinus1 = b->CreateSub(strideLength, ONE);
    255249            Value * const padding = b->CreateSelect(term, strideLengthMinus1, b->getSize(0));
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r6241 r6252  
    7070}
    7171
     72inline Value * StreamSetBuffer::addOverflow(const std::unique_ptr<kernel::KernelBuilder> & b, Value * capacity, Value * const overflowItems) const {
     73    if (overflowItems) {
     74        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     75            Value * const overflowCapacity = b->getSize(getOverflowCapacity(b));
     76            Value * const valid = b->CreateICmpULE(overflowItems, overflowCapacity);
     77            b->CreateAssert(valid, "overflow items exceeds overflow capacity");
     78        }
     79        capacity = b->CreateAdd(capacity, overflowItems);
     80    }
     81    return capacity;
     82}
     83
    7284// External File Buffer
    7385Type * ExternalBuffer::getHandleType(const std::unique_ptr<kernel::KernelBuilder> & b) const {
     
    120132}
    121133
    122 Value * ExternalBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, const unsigned /* overflowSize */) const {
     134Value * ExternalBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, Value * /* overflowItems */) const {
    123135    return b->CreateSub(totalItems, fromPosition);
    124136}
    125137
    126 Value * ExternalBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const /* consumed */, const unsigned /* overflowSize */) const {
     138Value * ExternalBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const /* consumed */, Value * /* overflowItems */) const {
    127139    assert (fromPosition);
    128140    Value * const capacity = getCapacity(b.get());
     
    169181
    170182Value * ExternalBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
    171     //assertValidBlockIndex(b, blockIndex);
     183    assertValidBlockIndex(b, blockIndex);
    172184    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, blockIndex);
    173185}
    174186
    175187Value * ExternalBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
    176     //assertValidBlockIndex(b, blockIndex);
     188    assertValidBlockIndex(b, blockIndex);
    177189    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, blockIndex, packIndex);
    178190}
     
    273285}
    274286
    275 Value * StaticBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, const unsigned overflowSize) const {
     287Value * StaticBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const totalItems, Value * overflowItems) const {
    276288    Value * const capacity = getCapacity(b.get());
    277289    Value * const availableItems = b->CreateSub(totalItems, fromPosition);
    278290    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
    279     Value * capacityWithOverflow = capacity;
    280     assert (overflowSize <= getOverflowCapacity(b));
    281     if (overflowSize) {
    282         capacityWithOverflow = b->CreateAdd(capacity, b->getSize(overflowSize - 1));
    283     }
     291    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems);
    284292    Value * const linearSpace = b->CreateSub(capacityWithOverflow, fromOffset);
    285293    return b->CreateUMin(availableItems, linearSpace);
    286294}
    287295
    288 Value * StaticBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, const unsigned overflowSize) const {
     296Value * StaticBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, Value * overflowItems) const {
    289297    Value * const capacity = getCapacity(b.get());
    290298    Value * const unconsumedItems = b->CreateSub(fromPosition, consumedItems);
    291     Value * const full = b->CreateICmpUGE(unconsumedItems, capacity); // capacityWithOverflow);
     299    Value * const full = b->CreateICmpUGE(unconsumedItems, capacity);
    292300    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
    293301    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
    294302    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
    295     Value * capacityWithOverflow = capacity;
    296     assert (overflowSize <= getOverflowCapacity(b));
    297     if (overflowSize) {
    298         // NOTE: the -1 is to discourage the pipeline from writing an entire block to the overflow only to copy back to the first block.
    299         capacityWithOverflow = b->CreateAdd(capacity, b->getSize(overflowSize - 1));
    300     }
     303    // limit the overflow so that we do not overwrite our unconsumed data during a copyback
     304    Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
     305    Value * const capacityWithOverflow = addOverflow(b, capacity, effectiveOverflow);
    301306    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
    302307    Value * const remaining = b->CreateSub(limit, fromOffset);
     
    402407}
    403408
    404 Value * DynamicBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> &b, Value * const fromPosition, Value * const totalItems, const unsigned overflowSize) const {
     409Value * DynamicBuffer::getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> &b, Value * const fromPosition, Value * const totalItems, Value * overflowItems) const {
    405410    Value * const capacity = getCapacity(b.get());
    406411    Value * const availableItems = b->CreateSub(totalItems, fromPosition);
    407412    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
    408     Value * capacityWithOverflow = capacity;
    409     assert (overflowSize <= getOverflowCapacity(b));
    410     if (overflowSize) {
    411         capacityWithOverflow = b->CreateAdd(capacity, b->getSize(overflowSize - 1));
    412     }
     413    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems);
    413414    Value * const linearSpace = b->CreateSub(capacityWithOverflow, fromOffset);
    414415    return b->CreateUMin(availableItems, linearSpace);
    415416}
    416417
    417 Value * DynamicBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, const unsigned overflowSize) const {
     418Value * DynamicBuffer::getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, Value * const fromPosition, Value * const consumedItems, Value * overflowItems) const {
    418419    Value * const capacity = getCapacity(b.get());
    419420    Value * const unconsumedItems = b->CreateSub(fromPosition, consumedItems);
    420     Value * const full = b->CreateICmpUGE(unconsumedItems, capacity); // capacityWithOverflow);
     421    Value * const full = b->CreateICmpUGE(unconsumedItems, capacity);
    421422    Value * const fromOffset = b->CreateURem(fromPosition, capacity);
    422423    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
    423424    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
    424     Value * capacityWithOverflow = capacity;
    425     assert (overflowSize <= getOverflowCapacity(b));
    426     if (overflowSize) {
    427         // NOTE: the -1 is to discourage the pipeline from writing an entire block to the overflow only to copy back to the first block.
    428         capacityWithOverflow = b->CreateAdd(capacity, b->getSize(overflowSize - 1));
    429     }
     425    // limit the overflow so that we do not overwrite our unconsumed data during a copyback
     426    Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
     427    Value * const capacityWithOverflow = addOverflow(b, capacity, effectiveOverflow);
    430428    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
    431429    Value * const remaining = b->CreateSub(limit, fromOffset);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r6186 r6252  
    2727        , DynamicBuffer
    2828    };
    29    
     29
    3030    BufferKind getBufferKind() const {
    3131        return mBufferKind;
     
    3535        return mType;
    3636    }
    37    
     37
    3838    llvm::Type * getBaseType() const {
    3939        return mBaseType;
     
    4747        return numStreams;
    4848    }
    49    
     49
    5050    unsigned getAddressSpace() const {
    5151        return mAddressSpace;
     
    7171
    7272    // The number of items that cam be linearly accessed from a given logical stream position.
    73     virtual llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, const unsigned overflowSize = 0) const = 0;
    74 
    75     virtual llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, const unsigned overflowSize = 0) const = 0;
     73    virtual llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, llvm::Value * overflowItems = nullptr) const = 0;
     74
     75    virtual llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, llvm::Value * overflowItems = nullptr) const = 0;
    7676
    7777    virtual llvm::Type * getHandleType(const std::unique_ptr<kernel::KernelBuilder> & b) const = 0;
     
    9494
    9595    virtual void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size) const = 0;
    96    
     96
    9797    virtual llvm::Value * getCapacity(IDISA::IDISA_Builder * const b) const = 0;
    9898
     
    106106
    107107    llvm::Value * getHandle(IDISA::IDISA_Builder * const b) const;
     108
     109    llvm::Value * addOverflow(const std::unique_ptr<KernelBuilder> &b, llvm::Value * capacity, llvm::Value * const overflowItems) const;
    108110
    109111    StreamSetBuffer(const BufferKind k, const std::unique_ptr<KernelBuilder> & b, llvm::Type * baseType, unsigned AddressSpace);
     
    122124    const unsigned                  mAddressSpace;
    123125    llvm::Type * const              mBaseType;
    124 };   
     126};
    125127
    126128class ExternalBuffer final : public StreamSetBuffer {
     
    144146    llvm::Value * getStreamLogicalBasePtr(IDISA::IDISA_Builder * const b, llvm::Value * const streamIndex, llvm::Value * blockIndex) const override;
    145147
    146     llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, const unsigned overflowSize = 0) const override;
    147 
    148     llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, const unsigned overflowSize = 0) const override;
     148    llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, llvm::Value * overflowItems = nullptr) const override;
     149
     150    llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, llvm::Value * overflowItems = nullptr) const override;
    149151
    150152    llvm::Type * getHandleType(const std::unique_ptr<kernel::KernelBuilder> & b) const override;
     
    179181        return b->getBufferKind() == BufferKind::StaticBuffer;
    180182    }
    181    
     183
    182184    StaticBuffer(const std::unique_ptr<KernelBuilder> & b, llvm::Type * const type,
    183185                 const size_t capacity, const size_t overflowBlocks = 0, const unsigned AddressSpace = 0);
     
    187189    void releaseBuffer(const std::unique_ptr<KernelBuilder> & b) const override;
    188190
    189     llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * const totalItems, const unsigned overflowSize = 0) const override;
    190 
    191     llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const fromPosition, llvm::Value * const consumedItems, const unsigned overflowSize = 0) const override;
     191    llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * const totalItems, llvm::Value * overflowItems = nullptr) const override;
     192
     193    llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const fromPosition, llvm::Value * const consumedItems, llvm::Value * overflowItems = nullptr) const override;
    192194
    193195    bool hasOverflow() const override {
     
    227229
    228230};
    229        
     231
    230232class DynamicBuffer final : public StreamSetBuffer {
    231233
     
    236238public:
    237239    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::DynamicBuffer;}
    238    
     240
    239241    DynamicBuffer(const std::unique_ptr<KernelBuilder> & b, llvm::Type * type, size_t initialCapacity, size_t overflowSize = 0, unsigned AddressSpace = 0);
    240242
     
    245247    // void expandBuffer(const std::unique_ptr<KernelBuilder> & b, llvm::Value * consumed, llvm::Value * produced, llvm::Value * required) const;
    246248
    247     llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, const unsigned overflowSize = 0) const override;
    248 
    249     llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, const unsigned overflowSize = 0) const override;
    250    
     249    llvm::Value * getLinearlyAccessibleItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * totalItems, llvm::Value * overflowItems = nullptr) const override;
     250
     251    llvm::Value * getLinearlyWritableItems(const std::unique_ptr<KernelBuilder> & b, llvm::Value * fromPosition, llvm::Value * consumedItems, llvm::Value * overflowItems = nullptr) const override;
     252
    251253    bool hasOverflow() const override {
    252254        return mOverflow > 0;
     
    264266
    265267    llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b) const override;
    266    
     268
    267269    llvm::Value * getCapacity(IDISA::IDISA_Builder * const b) const override;
    268    
     270
    269271    void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * capacity) const override;
    270272
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r6220 r6252  
    121121            mKernel->addInternalScalar(stmt->getType(), stmt->getName().str());
    122122        }
    123     }   
     123    }
    124124}
    125125
    126126void PabloCompiler::addBranchCounter(const std::unique_ptr<kernel::KernelBuilder> & b) {
    127     if (CompileOptionIsSet(PabloCompilationFlags::EnableProfiling)) {       
     127    if (CompileOptionIsSet(PabloCompilationFlags::EnableProfiling)) {
    128128        Value * ptr = b->getScalarFieldPtr("profile");
    129129        assert (mBasicBlock.size() < ptr->getType()->getPointerElementType()->getArrayNumElements());
     
    166166    BasicBlock * const ifBodyBlock = b->CreateBasicBlock("if.body_" + std::to_string(mBranchCount));
    167167    BasicBlock * const ifEndBlock = b->CreateBasicBlock("if.end_" + std::to_string(mBranchCount));
    168    
     168
    169169    std::vector<std::pair<const Var *, Value *>> incoming;
    170170
     
    195195
    196196    const PabloBlock * ifBody = ifStatement->getBody();
    197    
     197
    198198    mCarryManager->enterIfScope(b, ifBody);
    199199
     
    202202        condition = b->bitblock_any(mCarryManager->generateSummaryTest(b, condition));
    203203    }
    204    
     204
    205205    b->CreateCondBr(condition, ifBodyBlock, ifEndBlock);
    206    
     206
    207207    // Entry processing is complete, now handle the body of the if.
    208208    b->SetInsertPoint(ifBodyBlock);
     
    502502            value = compileExpression(b, cast<Assign>(stmt)->getValue());
    503503            if (isa<Extract>(expr) || (isa<Var>(expr) && cast<Var>(expr)->isKernelParameter())) {
    504                 Value * const ptr = compileExpression(b, expr, false);               
     504                Value * const ptr = compileExpression(b, expr, false);
    505505                Type * const elemTy = ptr->getType()->getPointerElementType();
    506506                b->CreateAlignedStore(b->CreateZExt(value, elemTy), ptr, getAlignment(elemTy));
     
    527527            PabloAST * stream = l->getExpression();
    528528            Value * index = nullptr;
    529             if (LLVM_UNLIKELY(isa<Extract>(stream))) {               
     529            if (LLVM_UNLIKELY(isa<Extract>(stream))) {
    530530                index = compileExpression(b, cast<Extract>(stream)->getIndex(), true);
    531531                stream = cast<Extract>(stream)->getArray();
     
    640640
    641641Value * PabloCompiler::compileExpression(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloAST * const expr, const bool ensureLoaded) {
    642     const auto f = mMarker.find(expr);   
     642    const auto f = mMarker.find(expr);
    643643    Value * value = nullptr;
    644644    if (LLVM_LIKELY(f != mMarker.end())) {
Note: See TracChangeset for help on using the changeset viewer.