Ignore:
Timestamp:
Dec 20, 2017, 11:42:53 AM (18 months ago)
Author:
nmedfort
Message:

Bug fix for pipeline: it was terminating too early when there was insufficient output space to process all of the input for a kernel.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5783 r5793  
    269269    }
    270270    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    271     addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL);
     271    addScalar(sizeTy, TERMINATION_SIGNAL);
    272272    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    273273        addScalar(sizeTy, mStreamSetOutputs[i].getName() + CONSUMED_ITEM_COUNT_SUFFIX);
     
    628628inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const {
    629629    const auto & rate = binding.getRate();
    630     if (rate.isFixed() && binding.nonDeferred()) {
     630    if (rate.isFixed() && binding.nonDeferred() && !binding.isMisaligned()) {
    631631        const auto r = rate.getRate();
    632632        auto n = (r.numerator() * mStride);
     
    664664    }
    665665
    666     using AttributeId = kernel::Attribute::KindId;
    667666    using RateValue = ProcessingRate::RateValue;
    668667
     
    725724    Constant * const BLOCK_WIDTH_MASK = b->getSize(b->getBitBlockWidth() - 1);
    726725
     726    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     727        Value * terminatedTwice = b->CreateAnd(mIsFinal, b->getTerminationSignal());
     728        Value * unprocessedData = nullptr;
     729        for (unsigned i = 0; i < inputSetCount; i++) {
     730            Value * processed = b->getProcessedItemCount(mStreamSetInputs[i].getName());
     731            Value * const check = b->CreateICmpNE(processed, mAvailableItemCount[i]);
     732            unprocessedData = unprocessedData ? b->CreateOr(unprocessedData, check) : check;
     733        }
     734        b->CreateAssertZero(b->CreateAnd(terminatedTwice, unprocessedData),
     735                            getName() + " was called after its termination with additional input data");
     736        b->CreateAssertZero(terminatedTwice,
     737                            getName() + " was called after its termination");
     738    }
     739
    727740    // Now proceed with creation of the doSegment method.
    728741    BasicBlock * const segmentLoop = b->CreateBasicBlock("SegmentLoop");
     
    745758        const auto & name = input.getName();
    746759        const ProcessingRate & rate = input.getRate();
    747         Value * processed = b->getProcessedItemCount(name);
    748         //b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     760        Value * const processed = b->getProcessedItemCount(name);
    749761
    750762        mInitialProcessedItemCount[i] = processed;
    751763        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
    752764
    753         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    754             b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count");
    755         }
     765        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {           
     766            b->CreateAssert(b->CreateICmpULE(processed, mAvailableItemCount[i]),
     767                            getName() + ": " + name + " processed item count exceeds its available item count");
     768        }
     769
     770        // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is the stride size,
     771        // P is the current processed position, L is the lookahead amount and n ∈ â„€+.
    756772
    757773        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
    758         //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
    759 
    760774        Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed);
    761         //b->CallPrintInt(getName() + "_" + name + "_avail", avail);
    762 
    763 
    764         // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is
    765         // the stride size, P is the current processed position, L is the lookahead amount and n ∈ â„€+.
    766 
    767775        Value * remaining = avail;
    768776        if (LLVM_UNLIKELY(input.hasLookahead())) {
    769777            Constant * const lookahead = b->getSize(input.getLookahead());
    770778            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
    771             //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining);
    772779        }
    773780
    774781        inputStrideSize[i] = getStrideSize(b, rate);
    775 
    776782        Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    777 
    778         //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides);
    779 
    780783        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
    781784        if (tempBuffer) {
     
    795798            Value * const temporarySize = b->CreateTrunc(b->CreateMul(arraySize, b->getInt64(mStride)), unprocessed->getType());
    796799            Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize);
    797             if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    798                 b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable),
    799                                 "linearly available item count cannot exceed the temporarily available item count");
    800             }
    801800            Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK);
    802801            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), arraySize);
     
    816815
    817816            b->SetInsertPoint(resume);
    818             PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 3);
    819             bufferPtr->addIncoming(baseBuffer , entry);
     817            PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 4);
     818            bufferPtr->addIncoming(baseBuffer, entry);
    820819            bufferPtr->addIncoming(tempBuffer, copyToBackEnd);
    821820            bufferPtr->addIncoming(tempBuffer, copyToFrontEnd);
    822821            baseBuffer = bufferPtr;
    823822
    824             PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
     823            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 4);
    825824            phiAvailItemCount->addIncoming(avail, entry);
    826825            phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd);
     
    828827            avail = phiAvailItemCount;
    829828
    830             PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     829            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 4);
    831830            phiStrides->addIncoming(accessibleStrides, entry);
    832831            phiStrides->addIncoming(temporaryStrides, copyToBackEnd);
     
    849848        const ProcessingRate & rate = output.getRate();
    850849        Value * const produced = b->getProducedItemCount(name);
    851 
    852         //b->CallPrintInt(getName() + "_" + name + "_produced", produced);
    853 
    854850        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    855851        assert (baseBuffer->getType()->isPointerTy());
    856852        linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
    857 
    858         //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]);
    859 
    860853        outputStrideSize[i] = getStrideSize(b, rate);
    861854        // Is the number of linearly writable items sufficient for a stride?
     
    863856            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    864857            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
    865             //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides);
    866 
    867 
    868858            // Do we require a temporary buffer to write to?
    869859            if (tempBuffer) {
    870860                assert (tempBuffer->getType() == baseBuffer->getType());
    871861                BasicBlock * const entry = b->GetInsertBlock();
    872                 BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer");
     862                BasicBlock * const prepareTempBuffer = b->CreateBasicBlock(name + "PrepareTempBuffer");
    873863                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
    874864                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    875                 b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume);
     865                b->CreateUnlikelyCondBr(requiresCopy, prepareTempBuffer, resume);
    876866                // Clear the output buffer prior to using it
    877                 b->SetInsertPoint(clearBuffer);
     867                b->SetInsertPoint(prepareTempBuffer);
    878868                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    879869                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
     
    883873                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
    884874                phiBuffer->addIncoming(baseBuffer, entry);
    885                 phiBuffer->addIncoming(tempBuffer, clearBuffer);
     875                phiBuffer->addIncoming(tempBuffer, prepareTempBuffer);
    886876                baseBuffer = phiBuffer;
    887877                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
    888878                phiStrides->addIncoming(writableStrides, entry);
    889                 phiStrides->addIncoming(ONE, clearBuffer);
     879                phiStrides->addIncoming(ONE, prepareTempBuffer);
    890880                writableStrides = phiStrides;
    891881            }
     
    964954            continue;
    965955        }
     956
    966957        Value * const baseBuffer = mStreamSetOutputBaseAddress[i];
    967958        assert ("stack corruption likely" && (tempBuffer->getType() == baseBuffer->getType()));
     
    994985    //  We've dealt with the partial block processing and copied information back into the
    995986    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    996     if (hasNoTerminateAttribute()) {
    997         b->CreateCondBr(mIsFinal, segmentDone, strideDone);
    998     } else {
    999         BasicBlock * const setTermination = b->CreateBasicBlock("setTermination");
    1000         b->CreateCondBr(mIsFinal, setTermination, strideDone);
    1001 
    1002         b->SetInsertPoint(setTermination);
    1003         b->setTerminationSignal();
    1004         b->CreateBr(segmentDone);       
    1005     }
     987    BasicBlock * const setTermination = b->CreateBasicBlock("setTermination");
     988    b->CreateCondBr(mIsFinal, setTermination, strideDone);
     989
     990    b->SetInsertPoint(setTermination);
     991    b->setTerminationSignal();
     992    b->CreateBr(segmentDone);
    1006993
    1007994    /// STRIDE DONE
    1008995    strideDone->moveAfter(b->GetInsertBlock());
    1009996    b->SetInsertPoint(strideDone);
    1010 
    1011     b->CreateAssertZero(mIsFinal, "stride done cannot process the final block");
    1012997
    1013998    // do we have enough data for another stride?
     
    10191004        Value * const processed = b->getProcessedItemCount(name);
    10201005        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1021             b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data");
     1006            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + ": " + name + " processed data exceeds available data");
    10221007        }
    10231008        Value * remaining = b->CreateSub(avail, processed);
    1024         if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) {
    1025             Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount());
     1009        if (LLVM_UNLIKELY(input.hasLookahead())) {
     1010            Constant * const lookahead = b->getSize(input.getLookahead());
    10261011            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
    10271012        }
     
    10421027            Value * const consumed = b->getConsumedItemCount(name);
    10431028            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1044                 b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data");
     1029                b->CreateAssert(b->CreateICmpULE(consumed, produced),
     1030                                getName() + ": " + name + " consumed data exceeds produced data");
    10451031            }
    10461032            Value * const unconsumed = b->CreateSub(produced, consumed);
    10471033            Value * const capacity = b->getCapacity(name);
    10481034            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1049                 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity");
     1035                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity),
     1036                                getName() + ": " + name + " unconsumed data exceeds capacity");
    10501037            }
    10511038            Value * const remaining = b->CreateSub(capacity, unconsumed);
    10521039            Value * const remainingStrides = b->CreateUDiv(remaining, outputStrideSize[i]);
    10531040            Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO);
     1041
    10541042            hasMoreStrides = b->CreateAnd(hasMoreStrides, hasRemainingStrides);
    10551043        }
    10561044        // Do copybacks if necessary.
    10571045        if (mStreamSetOutputBuffers[i]->supportsCopyBack() && requiresCopyBack(rate)) {
    1058             b->CreateCopyBack(name, mInitialProducedItemCount[i], produced);
    1059         }
    1060     }
    1061 
    1062     // b->CreateAssertZero(b->CreateOr(b->CreateNot(initiallyFinal), hasMoreStrides), getName() + " does not have enough output space for the final stride");
     1046            BasicBlock * const copyBack = b->CreateBasicBlock(name + "CopyBack");
     1047            BasicBlock * const done = b->CreateBasicBlock(name + "CopyBackDone");
     1048
     1049            Value * const bufferSize = b->getBufferedSize(name);
     1050            Value * const prior = b->CreateURem(mInitialProducedItemCount[i], bufferSize);
     1051            Value * const current = b->CreateURem(produced, bufferSize);
     1052            b->CreateUnlikelyCondBr(b->CreateICmpUGT(prior, current), copyBack, done);
     1053
     1054            b->SetInsertPoint(copyBack);
     1055            Value * const baseAddress = b->getBaseAddress(name);
     1056            const auto copyAlignment = getItemAlignment(mStreamSetOutputs[i]);
     1057            b->CreateStreamCpy(name, baseAddress, ZERO, baseAddress, bufferSize, current, copyAlignment);
     1058            b->CreateBr(done);
     1059
     1060            b->SetInsertPoint(done);
     1061        }
     1062    }
    10631063
    10641064    b->CreateCondBr(hasMoreStrides, segmentLoop, segmentDone);
     
    13441344    }
    13451345
    1346     writeFinalBlockMethod(b, getRemainingItems(b));
     1346    Value * const remainingItems = getRemainingItems(b);
     1347
     1348//    b->CallPrintInt(getName() + "_remainingItems", remainingItems);
     1349
     1350    writeFinalBlockMethod(b, remainingItems);
    13471351
    13481352    b->CreateBr(segmentDone);
     
    15511555, mCurrentMethod(nullptr)
    15521556, mAvailablePrincipalItemCount(nullptr)
    1553 , mNoTerminateAttribute(false)
    15541557, mIsGenerated(false)
    15551558, mStride(0)
Note: See TracChangeset for help on using the changeset viewer.