Changeset 6255


Ignore:
Timestamp:
Dec 24, 2018, 2:55:10 PM (3 months ago)
Author:
nmedfort
Message:

Prevent PopCount? rate stream from starting within the overflow even when it contains enough items.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6228 r6255  
    151151
    152152{Binding{"marker", mask, FixedRate(), Principal()},
    153 Binding{"source", source, PopcountOf("marker")}}, // BlockSize(b->getBitBlockWidth())
     153Binding{"source", source, PopcountOf("marker")}},
    154154{Binding{"output", expanded, FixedRate()}},
    155155{}, {}, {})
     
    194194        pendingDataPhi[i]->addIncoming(pendingData[i], entry);
    195195    }
     196
    196197    Value * deposit_mask = b->loadInputStreamBlock("marker", ZERO, blockNoPhi);
    197198
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/buffer_management_logic.hpp

    r6253 r6255  
    623623        + ((direction == OverflowCopy::Forwards) ? "_copyForward" : "_copyBack");
    624624
     625    BasicBlock * const copyStart = b->CreateBasicBlock(prefix + "Start", mKernelExit);
    625626    BasicBlock * const copyLoop = b->CreateBasicBlock(prefix + "Loop", mKernelExit);
    626627    BasicBlock * const copyExit = b->CreateBasicBlock(prefix + "Exit", mKernelExit);
    627628
     629    b->CreateUnlikelyCondBr(cond, copyStart, copyExit);
     630
     631    b->SetInsertPoint(copyStart);
    628632    Value * const count = buffer->getStreamSetCount(b.get());
    629633    Value * blocksToCopy = b->CreateMul(itemsToCopy, count);
     
    637641    Value * const base = buffer->getBaseAddress(b.get());
    638642    Value * const overflow = buffer->getOverflowAddress(b.get());
    639     Value * const source = (direction == OverflowCopy::Forwards) ? base : overflow;
    640     Value * const target = (direction == OverflowCopy::Forwards) ? overflow : base;
     643    PointerType * const copyTy = b->getBitBlockType()->getPointerTo();
     644    Value * const source =
     645        b->CreatePointerCast((direction == OverflowCopy::Forwards) ? base : overflow, copyTy);
     646    Value * const target =
     647        b->CreatePointerCast((direction == OverflowCopy::Forwards) ? overflow : base, copyTy);
    641648
    642649    BasicBlock * const entryBlock = b->GetInsertBlock();
    643     b->CreateUnlikelyCondBr(cond, copyLoop, copyExit);
     650    b->CreateBr(copyLoop);
    644651
    645652    b->SetInsertPoint(copyLoop);
     
    688695    address = b->CreatePointerCast(address, buffer->getPointerType());
    689696    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    690         const auto prefix = mKernel->getName() + "." + binding.getName();
     697        const auto prefix = makeBufferName(mKernelIndex, binding);
    691698        ExternalBuffer tmp(b, binding.getType());
    692699        Value * const handle = b->CreateAlloca(tmp.getHandleType(b));
     
    696703        Value * const B0 = tmp.getStreamBlockPtr(b.get(), ZERO, blockIndex);
    697704        Value * const C0 = b->CreatePointerCast(B0, A0->getType());
    698         b->CreateAssert(b->CreateICmpEQ(A0, C0), prefix + ": logical base address is incorrect");
    699         Value * upToIndex = b->CreateAdd(blockIndex, b->CreateSub(mNumOfLinearStrides, b->getSize(1)));
    700         upToIndex = b->CreateSelect(b->CreateICmpEQ(mNumOfLinearStrides, ZERO), blockIndex, upToIndex);
    701         Value * const A1 = buffer->getStreamBlockPtr(b.get(), ZERO, upToIndex);
    702         Value * const B1 = tmp.getStreamBlockPtr(b.get(), ZERO, upToIndex);
    703         Value * const C1 = b->CreatePointerCast(B1, A1->getType());
    704         b->CreateAssert(b->CreateICmpEQ(A1, C1), prefix + ": logical base address is incorrect");
     705        b->CreateAssert(b->CreateICmpEQ(A0, C0), prefix + ": logical start address is incorrect");
     706//        Value * upToIndex = b->CreateAdd(blockIndex, b->CreateSub(mNumOfLinearStrides, b->getSize(1)));
     707//        upToIndex = b->CreateSelect(b->CreateICmpEQ(mNumOfLinearStrides, ZERO), blockIndex, upToIndex);
     708//        Value * const A1 = buffer->getStreamBlockPtr(b.get(), ZERO, upToIndex);
     709//        Value * const B1 = tmp.getStreamBlockPtr(b.get(), ZERO, upToIndex);
     710//        Value * const C1 = b->CreatePointerCast(B1, A1->getType());
     711//        b->CreateAssert(b->CreateICmpEQ(A1, C1), prefix + ": logical end address is incorrect");
    705712    }
    706713    return address;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6254 r6255  
    263263    computeMinimumConsumedItemCounts(b);
    264264    computeMinimumPopCountReferenceCounts(b);
    265     computeFullyProducedItemCounts(b);
    266265    writeCopyForwardLogic(b);
    267266    writePopCountComputationLogic(b);
     267    computeFullyProducedItemCounts(b);
    268268    b->CreateBr(mKernelLoopExitPhiCatch);
    269269    b->SetInsertPoint(mKernelLoopExitPhiCatch);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6253 r6255  
    5353    // already been done. Work out the logic for these tests globally.
    5454
    55     Value * const accessible = getAccessibleInputItems(b, inputPort);
     55    Value * const accessible = getAccessibleInputItems(b, inputPort, true);
    5656    Value * const strideLength = getInputStrideLength(b, inputPort);
    5757    Value * const requiredInput = addLookahead(b, inputPort, strideLength);
     
    5959    const auto prefix = makeBufferName(mKernelIndex, input);
    6060    #ifdef PRINT_DEBUG_MESSAGES
     61    b->CallPrintInt(prefix + "_accessible", accessible);
    6162    b->CallPrintInt(prefix + "_requiredInput", requiredInput);
    6263    #endif
     
    8182 * @brief getAccessibleInputItems
    8283 ** ------------------------------------------------------------------------------------------------------------- */
    83 inline Value * PipelineCompiler::getAccessibleInputItems(BuilderRef b, const unsigned inputPort) {
     84Value * PipelineCompiler::getAccessibleInputItems(BuilderRef b, const unsigned inputPort, const bool addFacsimile) {
    8485    assert (inputPort < mAccessibleInputItems.size());
    8586    const Binding & input = mKernel->getInputStreamSetBinding(inputPort);
     
    8788    Value * const totalItems = getTotalItemCount(b, inputPort);
    8889    Value * const processed = mAlreadyProcessedPhi[inputPort];
    89     #ifdef PRINT_DEBUG_MESSAGES
    90     const auto prefix = makeBufferName(mKernelIndex, input);
    91     b->CallPrintInt(prefix + "_capacity", buffer->getCapacity(b.get()));
    92     b->CallPrintInt(prefix + "_totalItems", totalItems);
    93     b->CallPrintInt(prefix + "_processed", processed);
    94     #endif
    9590    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    9691        Value * const sanityCheck = b->CreateICmpULE(processed, totalItems);
     
    9994                        ": processed count exceeds total count");
    10095    }
    101     ConstantInt * const facsimile = b->getSize(getFacsimile(getInputBufferVertex(inputPort)));
    102     Value * const accessible = buffer->getLinearlyAccessibleItems(b, processed, totalItems, facsimile);
    103     #ifdef PRINT_DEBUG_MESSAGES
    104     b->CallPrintInt(prefix + "_accessible", accessible);
    105     #endif
    106     return accessible;
     96    ConstantInt * facsimile = nullptr;
     97    if (addFacsimile) {
     98        const auto size = getFacsimile(getInputBufferVertex(inputPort));
     99        if (size) {
     100            facsimile = b->getSize(size);
     101        }
     102    }
     103    return buffer->getLinearlyAccessibleItems(b, processed, totalItems, facsimile);
    107104}
    108105
     
    114111    if (LLVM_LIKELY(getOutputBufferType(outputPort) != BufferType::Managed)) {
    115112        const StreamSetBuffer * const buffer = getOutputBuffer(outputPort);
    116         Value * const writable = getWritableOutputItems(b, outputPort);
     113        Value * const writable = getWritableOutputItems(b, outputPort, true);
    117114        Value * const strideLength = getOutputStrideLength(b, outputPort);
    118115        const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
    119116        const auto prefix = makeBufferName(mKernelIndex, output);
    120117        #ifdef PRINT_DEBUG_MESSAGES
     118        b->CallPrintInt(prefix + "_writable", writable);
    121119        b->CallPrintInt(prefix + "_requiredOutput", strideLength);
    122120        #endif
     
    159157 * @brief getWritableOutputItems
    160158 ** ------------------------------------------------------------------------------------------------------------- */
    161 inline Value * PipelineCompiler::getWritableOutputItems(BuilderRef b, const unsigned outputPort) {
     159Value * PipelineCompiler::getWritableOutputItems(BuilderRef b, const unsigned outputPort, const bool addOverflow) {
    162160    assert (outputPort < mWritableOutputItems.size());
    163161    const Binding & output = mKernel->getOutputStreamSetBinding(outputPort);
     
    165163    Value * const produced = mAlreadyProducedPhi[outputPort]; assert (produced);
    166164    Value * const consumed = mConsumedItemCount[outputPort]; assert (consumed);
    167     #ifdef PRINT_DEBUG_MESSAGES
    168     const auto prefix = makeBufferName(mKernelIndex, output);
    169     b->CallPrintInt(prefix + "_capacity", buffer->getCapacity(b.get()));
    170     b->CallPrintInt(prefix + "_produced", produced);
    171     b->CallPrintInt(prefix + "_consumed", consumed);
    172     #endif
    173165    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    174166        Value * const sanityCheck = b->CreateICmpULE(consumed, produced);
     
    177169                        ": consumed count exceeds produced count");
    178170    }
    179     ConstantInt * const copyBack = b->getSize(getCopyBack(getOutputBufferVertex(outputPort)));
    180     Value * const writable = buffer->getLinearlyWritableItems(b, produced, consumed, copyBack);
    181     #ifdef PRINT_DEBUG_MESSAGES
    182     b->CallPrintInt(prefix + "_writable", writable);
    183     #endif
    184     return writable;
     171    ConstantInt * copyBack = nullptr;
     172    if (addOverflow) {
     173        const auto size = getCopyBack(getOutputBufferVertex(outputPort));
     174        if (size) {
     175            copyBack = b->getSize(size);
     176        }
     177    }
     178    return buffer->getLinearlyWritableItems(b, produced, consumed, copyBack);
    185179}
    186180
     
    211205    Value * const accessible = mAccessibleInputItems[inputPort];
    212206    if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
    213         numOfStrides = getMaximumNumOfPopCountStrides(b, input, accessible, getLookahead(b, inputPort));
     207        Value * const inBuffer = getAccessibleInputItems(b, inputPort, false);
     208        Constant * const lookAhead = getLookahead(b, inputPort);
     209        numOfStrides = getMaximumNumOfPopCountStrides(b, input, inBuffer, accessible, lookAhead);
    214210    } else {
    215211        Value * const strideLength = getInputStrideLength(b, inputPort);
     
    241237        Value * const writable = mWritableOutputItems[outputPort];
    242238        if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
    243             numOfStrides = getMaximumNumOfPopCountStrides(b, output, writable);
     239            Value * const inBuffer = getWritableOutputItems(b, outputPort, false);
     240            numOfStrides = getMaximumNumOfPopCountStrides(b, output, inBuffer, writable);
    244241        } else {
    245242            Value * const strideLength = getOutputStrideLength(b, outputPort);
     
    409406    const StreamSetBuffer * const buffer = cast<DynamicBuffer>(getOutputBuffer(outputPort));
    410407    buffer->setCapacity(b.get(), size);
    411     Value * const newWritableItems = getWritableOutputItems(b, outputPort);
     408    Value * const newWritableItems = getWritableOutputItems(b, outputPort, true);
    412409    BasicBlock * const expandEnd = b->GetInsertBlock();
    413410    b->CreateBr(target);
     
    803800    // TODO: if we determine all of the inputs of a stream have a blocksize attribute, or the output has one,
    804801    // we can skip masking it on input
     802
     803
     804
    805805    if (LLVM_UNLIKELY(binding.hasAttribute(AttrId::BlockSize))) {
    806806        // If the input rate has a block size attribute then --- for the purpose of determining how many
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6253 r6255  
    1818#include <queue>
    1919
    20 // #define PRINT_DEBUG_MESSAGES
     20//#define PRINT_DEBUG_MESSAGES
    2121
    2222using namespace boost;
     
    261261    static Value * getMaximumStrideLength(BuilderRef b, const Kernel * kernel, const Binding & binding);
    262262    Value * calculateNumOfLinearItems(BuilderRef b, const Binding & binding);
    263     Value * getAccessibleInputItems(BuilderRef b, const unsigned inputPort);
     263    Value * getAccessibleInputItems(BuilderRef b, const unsigned inputPort, const bool addFacsimile);
    264264    Value * getNumOfAccessibleStrides(BuilderRef b, const unsigned inputPort);
    265265    Value * getNumOfWritableStrides(BuilderRef b, const unsigned outputPort);
    266     Value * getWritableOutputItems(BuilderRef b, const unsigned outputPort);
     266    Value * getWritableOutputItems(BuilderRef b, const unsigned outputPort, const bool addOverflow);
    267267    Value * calculateBufferExpansionSize(BuilderRef b, const unsigned outputPort);
    268268    Value * addLookahead(BuilderRef b, const unsigned inputPort, Value * itemCount) const;
     
    291291
    292292    Value * getMinimumNumOfLinearPopCountItems(BuilderRef b, const Binding & binding);
    293     Value * getMaximumNumOfPopCountStrides(BuilderRef b, const Binding & binding, not_null<Value *> sourceItemCount, Constant * const lookAhead = nullptr);
     293    Value * getMaximumNumOfPopCountStrides(BuilderRef b, const Binding & binding, not_null<Value *> sourceItemCount, not_null<Value *> peekableItemCount, Constant * const lookAhead = nullptr);
    294294    Value * getNumOfLinearPopCountItems(BuilderRef b, const Binding & binding);
    295295
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6253 r6255  
    260260        invertCount = rate.isNegatedPopCount() ^ pc.AlwaysNegated;
    261261    }
    262 
    263262    if (invertCount) {
    264263        Constant * const BlockWidth = b->getSize(b->getBitBlockWidth());
    265264        minCount = b->CreateSub(BlockWidth, minCount);
    266265    }
    267     #ifdef PRINT_DEBUG_MESSAGES
    268     const auto prefix = makeBufferName(mKernelIndex, binding);
    269     b->CallPrintInt(prefix + "_minCount", minCount);
    270     #endif
    271266    return minCount;
    272267}
     
    275270 * @brief getMaximumNumOfPopCountStrides
    276271 ** ------------------------------------------------------------------------------------------------------------- */
    277 Value * PipelineCompiler::getMaximumNumOfPopCountStrides(BuilderRef b, const Binding & binding, not_null<Value *> sourceItemCount, Constant * const lookAhead) {
     272Value * PipelineCompiler::getMaximumNumOfPopCountStrides(BuilderRef b, const Binding & binding,
     273                                                         not_null<Value *> sourceItemCount,
     274                                                         not_null<Value *> peekableItemCount,
     275                                                         Constant * const lookAhead) {
    278276
    279277    const ProcessingRate & rate = binding.getRate();
     
    298296        b->CreateBasicBlock(prefix + "Exit", mKernelLoopCall);
    299297
    300     // If we have a base offset, then it's possible that our partial sum started "before"
    301     // the processed position of this kernel. Add the "skipped items" to the sourceItemCount.
     298    Constant * const ONE = b->getSize(1);
     299    Constant * const MAX_INT = ConstantInt::getAllOnesValue(sizeTy);
     300    // It's possible that our partial sum started "before" the processed position of this kernel...
    302301    Value * const offset = getPopCountInitialOffset(b, binding, bufferVertex, pc); assert (offset);
     302    Value * const initialIndex = b->CreateAdd(mNumOfLinearStrides, offset);
     303    // Add the "skipped items" to the source and peekable item counts
    303304    Value * const skippedItems = b->CreateLoad(b->CreateGEP(array, offset));
    304 
    305     #ifdef PRINT_DEBUG_MESSAGES
    306     b->CallPrintInt(prefix + "_sourceItemCount", sourceItemCount);
    307     b->CallPrintInt(prefix + "_skippedItems", skippedItems);
    308     #endif
    309 
    310     Value * available = b->CreateAdd(sourceItemCount, skippedItems);
    311 
    312     assert (mNumOfLinearStrides);
    313     Value * const strides = b->CreateAdd(mNumOfLinearStrides, offset);
    314 
    315     #ifdef PRINT_DEBUG_MESSAGES
    316     b->CallPrintInt(prefix + "_linearStrides", mNumOfLinearStrides);
    317     b->CallPrintInt(prefix + "_offset", offset);
    318     #endif
     305    Value * const availableItems = b->CreateAdd(sourceItemCount, skippedItems);
     306    Value * const peekableItems = b->CreateAdd(peekableItemCount, skippedItems);
     307    b->CreateBr(popCountLoop);
    319308
    320309    // TODO: replace this with a parallel icmp check and bitscan?
    321     b->CreateBr(popCountLoop);
    322 
    323310    b->SetInsertPoint(popCountLoop);
    324311    PHINode * const index = b->CreatePHI(sizeTy, 2);
    325     index->addIncoming(strides, popCountEntry);
     312    index->addIncoming(initialIndex, popCountEntry);
     313    PHINode * const nextRequiredItems = b->CreatePHI(sizeTy, 2);
     314    nextRequiredItems->addIncoming(MAX_INT, popCountEntry);
     315
    326316    Value * requiredItems = b->CreateLoad(b->CreateGEP(array, index));
    327 
    328317    if (LLVM_UNLIKELY(pc.AlwaysNegated ^ rate.isNegatedPopCount())) {
    329318        Constant * const Log2BlockWidth = b->getSize(std::log2(b->getBitBlockWidth()));
     
    336325    }
    337326
    338     Value * const hasEnough = b->CreateICmpULE(requiredItems, available);
     327    Value * const hasEnough = b->CreateICmpULE(requiredItems, availableItems);
    339328    BasicBlock * const popCountLoopEnd = b->GetInsertBlock();
    340     Constant * const ONE = b->getSize(1);
    341     Value * const nextIndex = b->CreateSub(index, ONE);
    342     index->addIncoming(nextIndex, popCountLoopEnd);
     329    Value * const priorIndex = b->CreateSub(index, ONE);
     330    index->addIncoming(priorIndex, popCountLoopEnd);
     331    nextRequiredItems->addIncoming(requiredItems, popCountLoopEnd);
    343332    b->CreateCondBr(hasEnough, popCountExit, popCountLoop);
    344333
    345334    b->SetInsertPoint(popCountExit);
    346     Value * const maxStrides = b->CreateSub(index, offset);
    347     #ifdef PRINT_DEBUG_MESSAGES
    348     b->CallPrintInt(prefix + "_maxStrides", maxStrides);
    349     #endif
    350     return maxStrides;
     335    // Since we want to allow the stream to peek into the overflow but not start
     336    // in it, check to see if we can support one more stride by using it.
     337    Value * const numOfStrides = b->CreateSub(index, offset);
     338    Value * const endedPriorToBufferEnd = b->CreateICmpNE(requiredItems, availableItems);
     339    Value * const canPeekIntoOverflow = b->CreateICmpULE(nextRequiredItems, peekableItems);
     340    Value * const useOverflow = b->CreateAnd(endedPriorToBufferEnd, canPeekIntoOverflow);
     341    return b->CreateSelect(useOverflow, b->CreateAdd(numOfStrides, ONE), numOfStrides);
    351342}
    352343
     
    441432        indices[2] = b->getInt32(BASE_OFFSET_INDEX);
    442433        Value * const baseOffset = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    443         #ifdef PRINT_DEBUG_MESSAGES
    444         const auto prefix = makeBufferName(mKernelIndex, binding) + "_popCount";
    445         b->CallPrintInt(prefix + "_baseOffset", baseOffset);
    446         #endif
    447434        Value * const strideOffset = getReferenceStreamOffset(b, binding);
    448         #ifdef PRINT_DEBUG_MESSAGES
    449         b->CallPrintInt(prefix + "_strideOffset", strideOffset);
    450         #endif
    451435        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    452436            Value * const sanityCheck = b->CreateICmpUGE(strideOffset, baseOffset);
     
    456440        }
    457441        pc.InitialOffset = b->CreateSub(strideOffset, baseOffset);
    458         #ifdef PRINT_DEBUG_MESSAGES
    459         b->CallPrintInt(prefix + "_initialOffset", pc.InitialOffset);
    460         #endif
    461442    }
    462443    return pc.InitialOffset;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r6252 r6255  
    7070}
    7171
    72 inline Value * StreamSetBuffer::addOverflow(const std::unique_ptr<kernel::KernelBuilder> & b, Value * capacity, Value * const overflowItems) const {
     72Value * StreamSetBuffer::addOverflow(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const bufferCapacity, Value * const overflowItems, Value * const consumedOffset) const {
    7373    if (overflowItems) {
    7474        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     
    7777            b->CreateAssert(valid, "overflow items exceeds overflow capacity");
    7878        }
    79         capacity = b->CreateAdd(capacity, overflowItems);
    80     }
    81     return capacity;
     79        // limit the overflow so that we do not overwrite our unconsumed data during a copyback
     80        Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
     81        return b->CreateAdd(bufferCapacity, effectiveOverflow);
     82    } else { // no overflow
     83        return bufferCapacity;
     84    }
    8285}
    8386
     
    181184
    182185Value * ExternalBuffer::getStreamBlockPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex) const {
    183     assertValidBlockIndex(b, blockIndex);
     186//    assertValidBlockIndex(b, blockIndex);
    184187    return StreamSetBuffer::getStreamBlockPtr(b, streamIndex, blockIndex);
    185188}
    186189
    187190Value * ExternalBuffer::getStreamPackPtr(IDISA_Builder * const b, Value * const streamIndex, Value * const blockIndex, Value * const packIndex) const {
    188     assertValidBlockIndex(b, blockIndex);
     191//    assertValidBlockIndex(b, blockIndex);
    189192    return StreamSetBuffer::getStreamPackPtr(b, streamIndex, blockIndex, packIndex);
    190193}
     
    301304    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
    302305    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
    303     // limit the overflow so that we do not overwrite our unconsumed data during a copyback
    304     Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
    305     Value * const capacityWithOverflow = addOverflow(b, capacity, effectiveOverflow);
     306    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems, consumedOffset);
    306307    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
    307308    Value * const remaining = b->CreateSub(limit, fromOffset);
     
    423424    Value * const consumedOffset = b->CreateURem(consumedItems, capacity);
    424425    Value * const toEnd = b->CreateICmpULE(consumedOffset, fromOffset);
    425     // limit the overflow so that we do not overwrite our unconsumed data during a copyback
    426     Value * const effectiveOverflow = b->CreateUMin(consumedOffset, overflowItems);
    427     Value * const capacityWithOverflow = addOverflow(b, capacity, effectiveOverflow);
     426    Value * const capacityWithOverflow = addOverflow(b, capacity, overflowItems, consumedOffset);
    428427    Value * const limit = b->CreateSelect(toEnd, capacityWithOverflow, consumedOffset);
    429428    Value * const remaining = b->CreateSub(limit, fromOffset);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r6252 r6255  
    107107    llvm::Value * getHandle(IDISA::IDISA_Builder * const b) const;
    108108
    109     llvm::Value * addOverflow(const std::unique_ptr<KernelBuilder> &b, llvm::Value * capacity, llvm::Value * const overflowItems) const;
     109    llvm::Value * addOverflow(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const bufferCapacity, llvm::Value * const overflowItems, llvm::Value * const consumedOffset = nullptr) const;
    110110
    111111    StreamSetBuffer(const BufferKind k, const std::unique_ptr<KernelBuilder> & b, llvm::Type * baseType, unsigned AddressSpace);
Note: See TracChangeset for help on using the changeset viewer.