Ignore:
Timestamp:
Dec 15, 2017, 12:44:01 PM (14 months ago)
Author:
nmedfort
Message:

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5771 r5782  
    624624
    625625/** ------------------------------------------------------------------------------------------------------------- *
    626  * @brief roundUp
    627  ** ------------------------------------------------------------------------------------------------------------- */
    628 unsigned roundUp(const ProcessingRate::RateValue & r) {
    629     if (LLVM_LIKELY(r.denominator() == 1)) {
    630         return r.numerator();
    631     } else {
    632         return (r.numerator() + r.denominator() - 1) / r.denominator();
    633     }
    634 }
    635 
    636 /** ------------------------------------------------------------------------------------------------------------- *
    637626 * @brief getItemAlignment
    638627 ** ------------------------------------------------------------------------------------------------------------- */
    639628inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const {
    640629    const auto & rate = binding.getRate();
    641     if (rate.isFixed()) {
    642         const auto & r = rate.getRate();
    643         const auto n = (r.numerator() * mStride);
     630    if (rate.isFixed() && binding.nonDeferred()) {
     631        const auto r = rate.getRate();
     632        auto n = (r.numerator() * mStride);
    644633        if (LLVM_LIKELY(r.denominator() == 1)) {
    645634            return n;
     
    675664    }
    676665
     666    using AttributeId = kernel::Attribute::KindId;
     667    using RateValue = ProcessingRate::RateValue;
     668
    677669    const auto inputSetCount = mStreamSetInputs.size();
    678670    const auto outputSetCount = mStreamSetOutputs.size();
     
    682674    AllocaInst * temporaryInputBuffer[inputSetCount];
    683675    for (unsigned i = 0; i < inputSetCount; ++i) {
    684         const auto & input = mStreamSetInputs[i];
     676        const Binding & input = mStreamSetInputs[i];
    685677        const ProcessingRate & rate = input.getRate();
    686678        if (isTransitivelyUnknownRate(rate)) {
    687679            report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input.");
    688         } else if (rate.isFixed() && input.nonDeferred() && !requiresBufferedFinalStride(input)) {
     680        } else if (rate.isFixed() && !requiresBufferedFinalStride(input)) {
    689681            temporaryInputBuffer[i] = nullptr;
    690682        } else {
    691683            Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
    692             const auto ub = getUpperBound(rate);
    693             Constant * arraySize = b->getInt64(roundUp(ub));
     684            auto ub = getUpperBound(rate);
     685            if (LLVM_UNLIKELY(input.hasLookahead())) {
     686                ub += RateValue(input.getLookahead(), mStride);
     687            }
     688            Constant * const arraySize = b->getInt64(ceiling(ub));
    694689            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    695690            assert (ptr->isStaticAlloca());
     
    700695    AllocaInst * temporaryOutputBuffer[outputSetCount];
    701696    for (unsigned i = 0; i < outputSetCount; i++) {
    702         const auto & output = mStreamSetOutputs[i];
     697        const Binding & output = mStreamSetOutputs[i];
    703698        const ProcessingRate & rate = output.getRate();
    704         if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() && !requiresBufferedFinalStride(output)))) {
     699        if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && !requiresBufferedFinalStride(output)))) {
    705700            temporaryOutputBuffer[i] = nullptr;
    706701        } else {           
     
    710705            }
    711706            Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
    712             Constant * arraySize = b->getInt64(roundUp(ub));
     707            Constant * const arraySize = b->getInt64(ceiling(ub));
    713708            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    714709            assert (ptr->isStaticAlloca());
     
    742737    // linearly available strides.
    743738    Value * numOfStrides = nullptr;
    744     mInitialAvailableItemCount.resize(inputSetCount);
     739    mInitialAvailableItemCount.assign(mAvailableItemCount.begin(), mAvailableItemCount.end());
    745740    mInitialProcessedItemCount.resize(inputSetCount);
    746741    mStreamSetInputBaseAddress.resize(inputSetCount);
    747742    Value * inputStrideSize[inputSetCount];
    748743    for (unsigned i = 0; i < inputSetCount; i++) {
    749         const auto & input = mStreamSetInputs[i];
     744        const Binding & input = mStreamSetInputs[i];
    750745        const auto & name = input.getName();
    751746        const ProcessingRate & rate = input.getRate();
    752         Value * const ic = b->getProcessedItemCount(name);
    753         mInitialProcessedItemCount[i] = ic;
     747        Value * processed = b->getProcessedItemCount(name);
     748        //b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     749
     750        mInitialProcessedItemCount[i] = processed;
     751        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
     752
    754753        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    755             b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic),
    756                             "processed item count cannot exceed the available item count");
    757         }
    758         assert (ic->getType() == mAvailableItemCount[i]->getType());
    759         Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic);
    760         Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    761         mInitialAvailableItemCount[i] = mAvailableItemCount[i];
    762         mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed);
    763 
    764         // Are our linearly accessible items sufficient for a stride?
     754            b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count");
     755        }
     756
     757        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
     758        //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
     759
     760        Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed);
     761        //b->CallPrintInt(getName() + "_" + name + "_avail", avail);
     762
     763
     764        // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is
     765        // the stride size, P is the current processed position, L is the lookahead amount and n ∈ â„€+.
     766
     767        Value * remaining = avail;
     768        if (LLVM_UNLIKELY(input.hasLookahead())) {
     769            Constant * const lookahead = b->getSize(input.getLookahead());
     770            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     771            //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining);
     772        }
     773
    765774        inputStrideSize[i] = getStrideSize(b, rate);
    766         Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]);
     775
     776        Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
     777
     778        //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides);
     779
    767780        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
    768781        if (tempBuffer) {
     
    779792
    780793            b->SetInsertPoint(copyFromBack);
    781             Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]);
     794            Value * const temporarySize = b->CreateMul(tempBuffer->getArraySize(), b->getSize(mStride));
     795            Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize);
    782796            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    783                 b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable),
    784                                 "linearly available cannot be greater than temporarily available");
    785             }
    786             Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK);
     797                b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable),
     798                                "linearly available item count cannot exceed the temporarily available item count");
     799            }
     800            Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK);
    787801            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    788802            b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    789803            const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]);
    790             b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment);
     804            b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, avail, copyAlignment);
    791805            Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE);
    792806            BasicBlock * const copyToBackEnd = b->GetInsertBlock();
    793             b->CreateCondBr(b->CreateICmpNE(mAvailableItemCount[i], temporaryAvailable), copyFromFront, resume);
     807            b->CreateCondBr(b->CreateICmpNE(temporaryAvailable, unprocessed), copyFromFront, resume);
    794808
    795809            b->SetInsertPoint(copyFromFront);
    796             Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);
     810            Value * const remaining = b->CreateSub(temporaryAvailable, avail);
    797811            Value * const baseAddress = b->getBaseAddress(name);
    798             b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
     812            b->CreateStreamCpy(name, tempBuffer, avail, baseAddress, ZERO, remaining, copyAlignment);
    799813            BasicBlock * const copyToFrontEnd = b->GetInsertBlock();
    800814            b->CreateBr(resume);
     
    808822
    809823            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
    810             phiAvailItemCount->addIncoming(mAvailableItemCount[i], entry);
     824            phiAvailItemCount->addIncoming(avail, entry);
    811825            phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd);
    812826            phiAvailItemCount->addIncoming(temporaryAvailable, copyToFrontEnd);
    813             mAvailableItemCount[i] = phiAvailItemCount;
     827            avail = phiAvailItemCount;
    814828
    815829            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     
    819833            accessibleStrides = phiStrides;
    820834        }
    821 
     835        mAvailableItemCount[i] = avail;
    822836        mStreamSetInputBaseAddress[i] = baseBuffer;
    823837        numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides);
     
    833847        const auto & name = output.getName();
    834848        const ProcessingRate & rate = output.getRate();
    835         Value * const ic = b->getProducedItemCount(name);
    836         Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
     849        Value * const produced = b->getProducedItemCount(name);
     850
     851        //b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     852
     853        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    837854        assert (baseBuffer->getType()->isPointerTy());
    838         linearlyWritable[i] = b->getLinearlyWritableItems(name, ic);       
     855        linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
     856
     857        //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]);
     858
    839859        outputStrideSize[i] = getStrideSize(b, rate);
    840860        // Is the number of linearly writable items sufficient for a stride?
     
    842862            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    843863            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
     864            //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides);
     865
     866
    844867            // Do we require a temporary buffer to write to?
    845868            if (tempBuffer) {
    846869                assert (tempBuffer->getType() == baseBuffer->getType());
    847870                BasicBlock * const entry = b->GetInsertBlock();
    848                 BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary");
     871                BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer");
    849872                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
    850873                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    851 
    852                 b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume);
    853 
    854                 // Clear the buffer after use since we may end up reusing it within the same stride
    855                 b->SetInsertPoint(useTemporary);
     874                b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume);
     875                // Clear the output buffer prior to using it
     876                b->SetInsertPoint(clearBuffer);
    856877                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    857878                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    858879                b->CreateBr(resume);
    859 
     880                // Select the appropriate buffer / stride #
    860881                b->SetInsertPoint(resume);
    861882                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
    862883                phiBuffer->addIncoming(baseBuffer, entry);
    863                 phiBuffer->addIncoming(tempBuffer, useTemporary);
     884                phiBuffer->addIncoming(tempBuffer, clearBuffer);
    864885                baseBuffer = phiBuffer;
    865886                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
    866887                phiStrides->addIncoming(writableStrides, entry);
    867                 phiStrides->addIncoming(ONE, useTemporary);
     888                phiStrides->addIncoming(ONE, clearBuffer);
    868889                writableStrides = phiStrides;
    869 
    870890            }
    871891            numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
    872892        }
    873         mInitialProducedItemCount[i] = ic;
     893        mInitialProducedItemCount[i] = produced;
    874894        mStreamSetOutputBaseAddress[i] = baseBuffer;
    875895    }
     
    885905        }
    886906        for (unsigned i = 0; i < inputSetCount; ++i) {
    887             const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    888             if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     907            const auto & input = mStreamSetInputs[i];
     908            const ProcessingRate & rate = input.getRate();
     909            if (rate.isFixed() && input.nonDeferred()) {
    889910                mAvailableItemCount[i] = b->CreateSelect(mIsFinal, mAvailableItemCount[i], b->CreateMul(numOfStrides, inputStrideSize[i]));
    890911            }
     
    896917
    897918    for (unsigned i = 0; i < inputSetCount; ++i) {
    898         const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    899         if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     919        const auto & input = mStreamSetInputs[i];
     920        const ProcessingRate & rate = input.getRate();
     921        if (rate.isFixed() && input.nonDeferred()) {
    900922            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    901             b->setProcessedItemCount(mStreamSetInputs[i].getName(), ic);
     923            b->setProcessedItemCount(input.getName(), ic);
    902924        }
    903925    }
    904926
    905927    for (unsigned i = 0; i < outputSetCount; ++i) {
    906         const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
     928        const auto & output = mStreamSetOutputs[i];
     929        const ProcessingRate & rate = output.getRate();
    907930        if (rate.isFixed()) {
    908             assert (mStreamSetOutputs[i].nonDeferred());
     931            assert (output.nonDeferred());
    909932            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    910933            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
    911             b->setProducedItemCount(mStreamSetOutputs[i].getName(), ic);
     934            b->setProducedItemCount(output.getName(), ic);
    912935        }
    913936    }
     
    9901013    Value * hasMoreStrides = b->getTrue();
    9911014    for (unsigned i = 0; i < inputSetCount; ++i) {
    992         const auto & name = mStreamSetInputs[i].getName();
     1015        const Binding & input = mStreamSetInputs[i];
     1016        const auto & name = input.getName();
    9931017        Value * const avail = mInitialAvailableItemCount[i];
    9941018        Value * const processed = b->getProcessedItemCount(name);
    9951019        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    996             b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
    997         }
    998         Value * const remaining = b->CreateSub(avail, processed);
     1020            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data");
     1021        }
     1022        Value * remaining = b->CreateSub(avail, processed);
     1023        if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) {
     1024            Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount());
     1025            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     1026        }
    9991027        Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    10001028        Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO);
     
    10131041            Value * const consumed = b->getConsumedItemCount(name);
    10141042            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1015                 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     1043                b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data");
    10161044            }
    10171045            Value * const unconsumed = b->CreateSub(produced, consumed);
    10181046            Value * const capacity = b->getCapacity(name);
    10191047            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1020                 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     1048                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity");
    10211049            }
    10221050            Value * const remaining = b->CreateSub(capacity, unconsumed);
     
    11831211        for (const Attribute & attr : output.getAttributes()) {
    11841212            if (attr.isAdd()) {
    1185                 produced = b->CreateAdd(produced, b->getSize(attr.getAmount()));
     1213                produced = b->CreateAdd(produced, b->getSize(attr.amount()));
    11861214            } else if (attr.isRoundUpTo()) {
    1187                 produced = b->CreateRoundUp(produced, b->getSize(attr.getAmount()));
     1215                produced = b->CreateRoundUp(produced, b->getSize(attr.amount()));
    11881216            }
    11891217        }
Note: See TracChangeset for help on using the changeset viewer.