Ignore:
Timestamp:
Dec 20, 2017, 11:42:53 AM (17 months ago)
Author:
nmedfort
Message:

Bug fix for pipeline: it was terminating too early when there was insufficient output space to process all of the input for a kernel.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
17 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5626 r5793  
    270270PrintableBits::PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder)
    271271: BlockOrientedKernel("PrintableBits", {Binding{builder->getStreamSetTy(1), "bitStream"}}, {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {}, {}, {}) {
    272     setNoTerminateAttribute(true);
     272
    273273}
    274274
    275275SelectStream::SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex)
    276276: BlockOrientedKernel("SelectStream", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(1, 1), "bitStream"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mStreamIndex(streamIndex) {
    277     setNoTerminateAttribute(true);
    278277
    279278}
     
    281280ExpandOrSelectStreams::ExpandOrSelectStreams(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned sizeOutputStreamSet)
    282281: BlockOrientedKernel("ExpandOrSelectStreams", {Binding{builder->getStreamSetTy(sizeInputStreamSet), "bitStreams"}}, {Binding{builder->getStreamSetTy(sizeOutputStreamSet), "outputbitStreams"}}, {}, {}, {}), mSizeInputStreamSet(sizeInputStreamSet), mSizeOutputStreamSet(sizeOutputStreamSet) {
    283     setNoTerminateAttribute(true);
    284282
    285283}
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5782 r5793  
    105105        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
    106106
     107        Misaligned,
     108
     109        // Assume that we cannot statically compute the alignment of this stream set and
     110        // perform any operations accordingly
     111
    107112        BlockSize, /// NOT DONE
    108113
     
    198203        // a MultiBlock kernel will select the *maximum* input item count as it's
    199204        // principle item length and zero-extend the streams accordingly.
    200 
    201         CanTerminate,
    202 
    203         // Informs the pipeline that this kernel can pass a "termination" message to it.
    204         // in which case the pipeline will propogate the message to the subsequent
    205         // kernels and end the program once the final kernel has returned its result.
    206205
    207206    };
     
    253252    friend Attribute LookBehind(const unsigned);
    254253    friend Attribute Deferred();
     254    friend Attribute Misaligned();
    255255    friend Attribute ConditionalRegionBegin();
    256256    friend Attribute ConditionalRegionEnd();
     
    331331}
    332332
     333inline Attribute Misaligned() {
     334    return Attribute(Attribute::KindId::Misaligned, 0);
     335}
     336
    333337inline Attribute ConditionalRegionBegin() {
    334338    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5440 r5793  
    2020EvenOddKernel::EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
    2121: BlockOrientedKernel("EvenOdd", {Binding{builder->getStreamSetTy(8, 1), "BasisBits"}}, {Binding{builder->getStreamSetTy(2, 1), "even_odd"}}, {}, {}, {}) {
    22     setNoTerminateAttribute(true);
    2322
    2423}
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5782 r5793  
    229229InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
    230230: BlockOrientedKernel("Invert",
    231     // Inputs
    232     {Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
    233     // Outputs
    234     {Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
    235     // Input/Output Scalars and internal state
    236     {}, {}, {}) {
    237     setNoTerminateAttribute(true);   
     231// Inputs
     232{Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
     233// Outputs
     234{Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
     235// Input/Output Scalars and internal state
     236{}, {}, {}) {
     237
    238238}
    239239
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5782 r5793  
    6363    bool hasLookahead() const {
    6464        return hasAttribute(AttributeId::LookAhead);
     65    }
     66
     67    bool isMisaligned() const {
     68        return hasAttribute(AttributeId::Misaligned);
    6569    }
    6670
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5783 r5793  
    269269    }
    270270    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    271     addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL);
     271    addScalar(sizeTy, TERMINATION_SIGNAL);
    272272    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    273273        addScalar(sizeTy, mStreamSetOutputs[i].getName() + CONSUMED_ITEM_COUNT_SUFFIX);
     
    628628inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const {
    629629    const auto & rate = binding.getRate();
    630     if (rate.isFixed() && binding.nonDeferred()) {
     630    if (rate.isFixed() && binding.nonDeferred() && !binding.isMisaligned()) {
    631631        const auto r = rate.getRate();
    632632        auto n = (r.numerator() * mStride);
     
    664664    }
    665665
    666     using AttributeId = kernel::Attribute::KindId;
    667666    using RateValue = ProcessingRate::RateValue;
    668667
     
    725724    Constant * const BLOCK_WIDTH_MASK = b->getSize(b->getBitBlockWidth() - 1);
    726725
     726    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     727        Value * terminatedTwice = b->CreateAnd(mIsFinal, b->getTerminationSignal());
     728        Value * unprocessedData = nullptr;
     729        for (unsigned i = 0; i < inputSetCount; i++) {
     730            Value * processed = b->getProcessedItemCount(mStreamSetInputs[i].getName());
     731            Value * const check = b->CreateICmpNE(processed, mAvailableItemCount[i]);
     732            unprocessedData = unprocessedData ? b->CreateOr(unprocessedData, check) : check;
     733        }
     734        b->CreateAssertZero(b->CreateAnd(terminatedTwice, unprocessedData),
     735                            getName() + " was called after its termination with additional input data");
     736        b->CreateAssertZero(terminatedTwice,
     737                            getName() + " was called after its termination");
     738    }
     739
    727740    // Now proceed with creation of the doSegment method.
    728741    BasicBlock * const segmentLoop = b->CreateBasicBlock("SegmentLoop");
     
    745758        const auto & name = input.getName();
    746759        const ProcessingRate & rate = input.getRate();
    747         Value * processed = b->getProcessedItemCount(name);
    748         //b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     760        Value * const processed = b->getProcessedItemCount(name);
    749761
    750762        mInitialProcessedItemCount[i] = processed;
    751763        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
    752764
    753         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    754             b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count");
    755         }
     765        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {           
     766            b->CreateAssert(b->CreateICmpULE(processed, mAvailableItemCount[i]),
     767                            getName() + ": " + name + " processed item count exceeds its available item count");
     768        }
     769
     770        // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is the stride size,
     771        // P is the current processed position, L is the lookahead amount and n ∈ â„€+.
    756772
    757773        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
    758         //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
    759 
    760774        Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed);
    761         //b->CallPrintInt(getName() + "_" + name + "_avail", avail);
    762 
    763 
    764         // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is
    765         // the stride size, P is the current processed position, L is the lookahead amount and n ∈ â„€+.
    766 
    767775        Value * remaining = avail;
    768776        if (LLVM_UNLIKELY(input.hasLookahead())) {
    769777            Constant * const lookahead = b->getSize(input.getLookahead());
    770778            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
    771             //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining);
    772779        }
    773780
    774781        inputStrideSize[i] = getStrideSize(b, rate);
    775 
    776782        Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    777 
    778         //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides);
    779 
    780783        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
    781784        if (tempBuffer) {
     
    795798            Value * const temporarySize = b->CreateTrunc(b->CreateMul(arraySize, b->getInt64(mStride)), unprocessed->getType());
    796799            Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize);
    797             if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    798                 b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable),
    799                                 "linearly available item count cannot exceed the temporarily available item count");
    800             }
    801800            Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK);
    802801            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), arraySize);
     
    816815
    817816            b->SetInsertPoint(resume);
    818             PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 3);
    819             bufferPtr->addIncoming(baseBuffer , entry);
     817            PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 4);
     818            bufferPtr->addIncoming(baseBuffer, entry);
    820819            bufferPtr->addIncoming(tempBuffer, copyToBackEnd);
    821820            bufferPtr->addIncoming(tempBuffer, copyToFrontEnd);
    822821            baseBuffer = bufferPtr;
    823822
    824             PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
     823            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 4);
    825824            phiAvailItemCount->addIncoming(avail, entry);
    826825            phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd);
     
    828827            avail = phiAvailItemCount;
    829828
    830             PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     829            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 4);
    831830            phiStrides->addIncoming(accessibleStrides, entry);
    832831            phiStrides->addIncoming(temporaryStrides, copyToBackEnd);
     
    849848        const ProcessingRate & rate = output.getRate();
    850849        Value * const produced = b->getProducedItemCount(name);
    851 
    852         //b->CallPrintInt(getName() + "_" + name + "_produced", produced);
    853 
    854850        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    855851        assert (baseBuffer->getType()->isPointerTy());
    856852        linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
    857 
    858         //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]);
    859 
    860853        outputStrideSize[i] = getStrideSize(b, rate);
    861854        // Is the number of linearly writable items sufficient for a stride?
     
    863856            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    864857            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
    865             //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides);
    866 
    867 
    868858            // Do we require a temporary buffer to write to?
    869859            if (tempBuffer) {
    870860                assert (tempBuffer->getType() == baseBuffer->getType());
    871861                BasicBlock * const entry = b->GetInsertBlock();
    872                 BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer");
     862                BasicBlock * const prepareTempBuffer = b->CreateBasicBlock(name + "PrepareTempBuffer");
    873863                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
    874864                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    875                 b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume);
     865                b->CreateUnlikelyCondBr(requiresCopy, prepareTempBuffer, resume);
    876866                // Clear the output buffer prior to using it
    877                 b->SetInsertPoint(clearBuffer);
     867                b->SetInsertPoint(prepareTempBuffer);
    878868                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    879869                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
     
    883873                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
    884874                phiBuffer->addIncoming(baseBuffer, entry);
    885                 phiBuffer->addIncoming(tempBuffer, clearBuffer);
     875                phiBuffer->addIncoming(tempBuffer, prepareTempBuffer);
    886876                baseBuffer = phiBuffer;
    887877                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
    888878                phiStrides->addIncoming(writableStrides, entry);
    889                 phiStrides->addIncoming(ONE, clearBuffer);
     879                phiStrides->addIncoming(ONE, prepareTempBuffer);
    890880                writableStrides = phiStrides;
    891881            }
     
    964954            continue;
    965955        }
     956
    966957        Value * const baseBuffer = mStreamSetOutputBaseAddress[i];
    967958        assert ("stack corruption likely" && (tempBuffer->getType() == baseBuffer->getType()));
     
    994985    //  We've dealt with the partial block processing and copied information back into the
    995986    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    996     if (hasNoTerminateAttribute()) {
    997         b->CreateCondBr(mIsFinal, segmentDone, strideDone);
    998     } else {
    999         BasicBlock * const setTermination = b->CreateBasicBlock("setTermination");
    1000         b->CreateCondBr(mIsFinal, setTermination, strideDone);
    1001 
    1002         b->SetInsertPoint(setTermination);
    1003         b->setTerminationSignal();
    1004         b->CreateBr(segmentDone);       
    1005     }
     987    BasicBlock * const setTermination = b->CreateBasicBlock("setTermination");
     988    b->CreateCondBr(mIsFinal, setTermination, strideDone);
     989
     990    b->SetInsertPoint(setTermination);
     991    b->setTerminationSignal();
     992    b->CreateBr(segmentDone);
    1006993
    1007994    /// STRIDE DONE
    1008995    strideDone->moveAfter(b->GetInsertBlock());
    1009996    b->SetInsertPoint(strideDone);
    1010 
    1011     b->CreateAssertZero(mIsFinal, "stride done cannot process the final block");
    1012997
    1013998    // do we have enough data for another stride?
     
    10191004        Value * const processed = b->getProcessedItemCount(name);
    10201005        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1021             b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data");
     1006            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + ": " + name + " processed data exceeds available data");
    10221007        }
    10231008        Value * remaining = b->CreateSub(avail, processed);
    1024         if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) {
    1025             Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount());
     1009        if (LLVM_UNLIKELY(input.hasLookahead())) {
     1010            Constant * const lookahead = b->getSize(input.getLookahead());
    10261011            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
    10271012        }
     
    10421027            Value * const consumed = b->getConsumedItemCount(name);
    10431028            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1044                 b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data");
     1029                b->CreateAssert(b->CreateICmpULE(consumed, produced),
     1030                                getName() + ": " + name + " consumed data exceeds produced data");
    10451031            }
    10461032            Value * const unconsumed = b->CreateSub(produced, consumed);
    10471033            Value * const capacity = b->getCapacity(name);
    10481034            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1049                 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity");
     1035                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity),
     1036                                getName() + ": " + name + " unconsumed data exceeds capacity");
    10501037            }
    10511038            Value * const remaining = b->CreateSub(capacity, unconsumed);
    10521039            Value * const remainingStrides = b->CreateUDiv(remaining, outputStrideSize[i]);
    10531040            Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO);
     1041
    10541042            hasMoreStrides = b->CreateAnd(hasMoreStrides, hasRemainingStrides);
    10551043        }
    10561044        // Do copybacks if necessary.
    10571045        if (mStreamSetOutputBuffers[i]->supportsCopyBack() && requiresCopyBack(rate)) {
    1058             b->CreateCopyBack(name, mInitialProducedItemCount[i], produced);
    1059         }
    1060     }
    1061 
    1062     // b->CreateAssertZero(b->CreateOr(b->CreateNot(initiallyFinal), hasMoreStrides), getName() + " does not have enough output space for the final stride");
     1046            BasicBlock * const copyBack = b->CreateBasicBlock(name + "CopyBack");
     1047            BasicBlock * const done = b->CreateBasicBlock(name + "CopyBackDone");
     1048
     1049            Value * const bufferSize = b->getBufferedSize(name);
     1050            Value * const prior = b->CreateURem(mInitialProducedItemCount[i], bufferSize);
     1051            Value * const current = b->CreateURem(produced, bufferSize);
     1052            b->CreateUnlikelyCondBr(b->CreateICmpUGT(prior, current), copyBack, done);
     1053
     1054            b->SetInsertPoint(copyBack);
     1055            Value * const baseAddress = b->getBaseAddress(name);
     1056            const auto copyAlignment = getItemAlignment(mStreamSetOutputs[i]);
     1057            b->CreateStreamCpy(name, baseAddress, ZERO, baseAddress, bufferSize, current, copyAlignment);
     1058            b->CreateBr(done);
     1059
     1060            b->SetInsertPoint(done);
     1061        }
     1062    }
    10631063
    10641064    b->CreateCondBr(hasMoreStrides, segmentLoop, segmentDone);
     
    13441344    }
    13451345
    1346     writeFinalBlockMethod(b, getRemainingItems(b));
     1346    Value * const remainingItems = getRemainingItems(b);
     1347
     1348//    b->CallPrintInt(getName() + "_remainingItems", remainingItems);
     1349
     1350    writeFinalBlockMethod(b, remainingItems);
    13471351
    13481352    b->CreateBr(segmentDone);
     
    15511555, mCurrentMethod(nullptr)
    15521556, mAvailablePrincipalItemCount(nullptr)
    1553 , mNoTerminateAttribute(false)
    15541557, mIsGenerated(false)
    15551558, mStride(0)
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5782 r5793  
    110110    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
    111111
    112     bool hasNoTerminateAttribute() const {
    113         return mNoTerminateAttribute;
    114     }
    115 
    116112    StreamPort getStreamPort(const std::string & name) const;
    117113
     
    220216          Bindings && scalar_outputs,
    221217          Bindings && internal_scalars);
    222 
    223     void setNoTerminateAttribute(const bool noTerminate = true) {
    224         mNoTerminateAttribute = noTerminate;
    225     }
    226218
    227219    llvm::Value * getPrincipalItemCount() const {
     
    297289    llvm::Function *                    mCurrentMethod;
    298290    llvm::Value *                       mAvailablePrincipalItemCount;
    299     bool                                mNoTerminateAttribute;
    300291    bool                                mIsGenerated;
    301292    unsigned                            mStride;
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5782 r5793  
    118118
    119119Value * KernelBuilder::getTerminationSignal() {
    120     if (mKernel->hasNoTerminateAttribute()) {
    121         return getFalse();
    122     }
    123     return getScalarField(Kernel::TERMINATION_SIGNAL);
     120    return CreateICmpNE(getScalarField(Kernel::TERMINATION_SIGNAL), getSize(0));
    124121}
    125122
    126123void KernelBuilder::setTerminationSignal(llvm::Value * const value) {
    127     assert (!mKernel->hasNoTerminateAttribute());
    128124    assert (value->getType() == getInt1Ty());
    129125    if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
    130126        CallPrintIntToStderr(mKernel->getName() + ": setTerminationSignal", value);
    131127    }
    132     setScalarField(Kernel::TERMINATION_SIGNAL, value);
     128    setScalarField(Kernel::TERMINATION_SIGNAL, CreateZExt(value, getSizeTy()));
    133129}
    134130
     
    142138    return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, getConsumedItemCount(name), reverse);
    143139}
    144 
    145 //Value * KernelBuilder::getLinearlyCopyableItems(const std::string & name, Value * fromPosition, bool reverse) {
    146 //    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
    147 //    return buf->getLinearlyCopyableItems(this, getStreamHandle(name), fromPosition, reverse);
    148 //}
    149140
    150141/** ------------------------------------------------------------------------------------------------------------- *
     
    196187
    197188    const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
     189
    198190    const auto itemWidth = getItemWidth(buf->getBaseType());
    199191    assert ("invalid item width" && is_power_2(itemWidth));
     
    202194    // (w.r.t the stream copy) would be n*m. By taking this into account we can optimize and simplify the copy code.
    203195    const auto fieldWidth = getFieldWidth(itemWidth * itemAlignment, blockWidth);
    204 
    205 //    CallPrintInt(mKernel->getName() + "_" + name + "_target", target);
    206 //    CallPrintInt(mKernel->getName() + "_" + name + "_targetOffset", targetOffset);
    207 //    CallPrintInt(mKernel->getName() + "_" + name + "_source", source);
    208 //    CallPrintInt(mKernel->getName() + "_" + name + "_sourceOffset", sourceOffset);
    209 //    CallPrintInt(mKernel->getName() + "_" + name + "_itemsToCopy", itemsToCopy);
    210 
     196    const auto alignment = (fieldWidth + 7) / 8;
    211197    if (LLVM_LIKELY(itemWidth < fieldWidth)) {
    212         Constant * const factor = getSize(fieldWidth / itemWidth);
    213         CreateAssertZero(CreateURem(targetOffset, factor), "target offset is not a multiple of its field width");
    214         targetOffset = CreateUDiv(targetOffset, factor);
    215         CreateAssertZero(CreateURem(sourceOffset, factor), "source offset is not a multiple of its field width");
    216         sourceOffset = CreateUDiv(sourceOffset, factor);
     198        const auto factor = fieldWidth / itemWidth;
     199        Constant * const FACTOR = getSize(factor);
     200        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     201            ConstantInt * const ALIGNMENT = getSize(alignment);
     202            const auto kernelName = mKernel->getName()+ ": " + name;
     203            CreateAssertZero(CreateURem(CreatePtrToInt(target, getSizeTy()), ALIGNMENT), kernelName + " target is misaligned (" + std::to_string(alignment) + ")");
     204            CreateAssertZero(CreateURem(targetOffset, FACTOR), kernelName + " target offset is misaligned (" + std::to_string(factor) + ")");
     205            CreateAssertZero(CreateURem(CreatePtrToInt(source, getSizeTy()), ALIGNMENT), kernelName + " source is misaligned (" + std::to_string(alignment) + ")");
     206            CreateAssertZero(CreateURem(sourceOffset, FACTOR), kernelName + " source offset is misaligned (" + std::to_string(factor) + ")");
     207        }
     208        targetOffset = CreateUDiv(targetOffset, FACTOR);
     209        sourceOffset = CreateUDiv(sourceOffset, FACTOR);
    217210    }
    218211
     
    240233
    241234    */
    242 
    243     const auto alignment = (fieldWidth + 7) / 8;
    244235
    245236    Type * const fieldWidthTy = getIntNTy(fieldWidth);
     
    396387}
    397388
    398 void KernelBuilder::CreateCopyBack(const std::string & name, llvm::Value * from, llvm::Value * to) {
    399     const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
    400     buf->genCopyBackLogic(this, getStreamHandle(name), from, to, name);
    401 }
    402 
    403389Value * KernelBuilder::getConsumerLock(const std::string & name) {
    404390    return getScalarField(name + Kernel::CONSUMER_SUFFIX);
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5782 r5793  
    101101    llvm::Value * getBlockAddress(const std::string & name, llvm::Value * const blockIndex);
    102102
    103     void CreateCopyBack(const std::string & name, llvm::Value * from, llvm::Value * to);
    104 
    105103    void setBaseAddress(const std::string & name, llvm::Value * addr);
    106104
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5782 r5793  
    5858    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
    5959    PabloAST * const CR = ccc.compileCC(makeCC(0x0D));
    60     PabloAST * const LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     60    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeCC(0x0A, 0x0D), pb);
    6161    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
    6262
     
    9393    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    9494
    95     PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1));
     95    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
    9696    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    9797}
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp

    r5755 r5793  
    175175LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
    176176: MultiBlockKernel("lz4ByteStreamDecoder",
    177     // Inputs
    178     {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
    179      Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
    180      Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", FixedRate(), { Deferred(), LookBehind(65536) }}},
    181     // Outputs
    182     {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
    183     // Arguments
    184     {},
    185     {},
    186     {}),
    187  mBufferSize(bufferSize) {
    188     setNoTerminateAttribute(true);
     177// Inputs
     178{Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
     179 Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
     180 Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", FixedRate(), { Deferred(), Misaligned(), LookBehind(65536) }}},
     181// Outputs
     182{Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
     183// Arguments
     184{},
     185{},
     186{})
     187, mBufferSize(bufferSize) {
     188
    189189}
     190
     191
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp

    r5755 r5793  
    684684LZ4IndexDecoderKernel::LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    685685: BlockOrientedKernel("lz4IndexDecoder",
    686     // Inputs
    687     {Binding{b->getStreamSetTy(1, 8), "byteStream"},
    688      Binding{b->getStreamSetTy(1, 1), "extenders"}},
    689     // Outputs: literal start, literal length, match offset, match length
    690     {Binding{b->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
    691      Binding{b->getStreamSetTy(2, 32), "matchIndexes", RateEqualTo("literalIndexes")}},
    692     // Arguments
    693     {Binding{b->getInt1Ty(), "hasBlockChecksum"}},
    694     {},
    695     // Internal states:
    696     {Binding{b->getInt32Ty(), "BlockNo"},
    697      Binding{b->getInt8Ty(), "State"},
    698      Binding{b->getInt32Ty(), "LZ4BlockStart"},
    699      Binding{b->getInt32Ty(), "LZ4BlockEnd"},
    700      Binding{b->getInt32Ty(), "BytesToSkip"},
    701      Binding{b->getInt32Ty(), "TempLength"},
    702      Binding{b->getInt32Ty(), "TempCount"},
    703      Binding{b->getInt32Ty(), "LiteralStart"},
    704      Binding{b->getInt32Ty(), "LiteralLength"},
    705      Binding{b->getInt32Ty(), "MatchOffset"},
    706      Binding{b->getInt32Ty(), "MatchLength"}})
     686// Inputs
     687{Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Misaligned()},
     688 Binding{b->getStreamSetTy(1, 1), "extenders"}},
     689// Outputs: literal start, literal length, match offset, match length
     690{Binding{b->getStreamSetTy(2, 32), "literalIndexes", UnknownRate()},
     691 Binding{b->getStreamSetTy(2, 32), "matchIndexes", RateEqualTo("literalIndexes")}},
     692// Arguments
     693{Binding{b->getInt1Ty(), "hasBlockChecksum"}},
     694{},
     695// Internal states:
     696{Binding{b->getInt32Ty(), "BlockNo"},
     697 Binding{b->getInt8Ty(), "State"},
     698 Binding{b->getInt32Ty(), "LZ4BlockStart"},
     699 Binding{b->getInt32Ty(), "LZ4BlockEnd"},
     700 Binding{b->getInt32Ty(), "BytesToSkip"},
     701 Binding{b->getInt32Ty(), "TempLength"},
     702 Binding{b->getInt32Ty(), "TempCount"},
     703 Binding{b->getInt32Ty(), "LiteralStart"},
     704 Binding{b->getInt32Ty(), "LiteralLength"},
     705 Binding{b->getInt32Ty(), "MatchOffset"},
     706 Binding{b->getInt32Ty(), "MatchLength"}})
    707707, wordWidth{b->getSizeTy()->getBitWidth()} {
    708     setNoTerminateAttribute(true);
    709 }
     708
     709}
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5757 r5793  
    208208    {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
    209209  mAligned(aligned) {
    210     setNoTerminateAttribute(true);
    211 }
    212 
    213 }
     210    if (!aligned) {
     211        mStreamSetInputs[0].addAttribute(Misaligned());
     212    }
     213}
     214}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5755 r5793  
    1515namespace kernel {
    1616
    17 Value * StdOutKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * const numOfStrides) {
    18     Value * codeUnitBuffer = iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0));
    19     codeUnitBuffer = iBuilder->CreatePointerCast(codeUnitBuffer, iBuilder->getInt8PtrTy());
     17Value * StdOutKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
     18    Value * codeUnitBuffer = b->getInputStreamBlockPtr("codeUnitBuffer", b->getInt32(0));
     19    codeUnitBuffer = b->CreatePointerCast(codeUnitBuffer, b->getInt8PtrTy());
    2020    Value * bytesToDo = mAvailableItemCount[0];
    2121    if (LLVM_UNLIKELY(mCodeUnitWidth > 8)) {
    22         bytesToDo = iBuilder->CreateMul(bytesToDo, iBuilder->getSize(mCodeUnitWidth / 8));
     22        bytesToDo = b->CreateMul(bytesToDo, b->getSize(mCodeUnitWidth / 8));
    2323    } else if (LLVM_UNLIKELY(mCodeUnitWidth < 8)) {
    24         bytesToDo = iBuilder->CreateUDiv(bytesToDo, iBuilder->getSize(8 / mCodeUnitWidth));
     24        bytesToDo = b->CreateUDiv(bytesToDo, b->getSize(8 / mCodeUnitWidth));
    2525    }
    26     iBuilder->CreateWriteCall(iBuilder->getInt32(1), codeUnitBuffer, bytesToDo);
     26    b->CreateWriteCall(b->getInt32(1), codeUnitBuffer, bytesToDo);
    2727    return numOfStrides;
    2828}
    2929
    30 StdOutKernel::StdOutKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned codeUnitWidth)
    31 : MultiBlockKernel("stdout", {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {}, {})
     30StdOutKernel::StdOutKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned codeUnitWidth)
     31: MultiBlockKernel("stdout", {Binding{b->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {}, {})
    3232, mCodeUnitWidth(codeUnitWidth) {
    33     setNoTerminateAttribute(true);
    3433    // setKernelStride(getpagesize());
    3534}
    3635
    37 void FileSink::generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    38     BasicBlock * setTerminationOnFailure = iBuilder->CreateBasicBlock("setTerminationOnFailure");
    39     BasicBlock * fileSinkInitExit = iBuilder->CreateBasicBlock("fileSinkInitExit");
    40     Value * fileName = iBuilder->getScalarField("fileName");
    41     Value * fileNameLength = iBuilder->CreateStrlenCall(fileName);
     36void FileSink::generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
     37    BasicBlock * setTerminationOnFailure = b->CreateBasicBlock("setTerminationOnFailure");
     38    BasicBlock * fileSinkInitExit = b->CreateBasicBlock("fileSinkInitExit");
     39    Value * fileName = b->getScalarField("fileName");
     40    Value * fileNameLength = b->CreateStrlenCall(fileName);
    4241    // Make a temporary file name template with the characters "XXXXXX" appended
    4342    // as required by mkstemp.
    44     Constant * suffixPlusNullLength = iBuilder->getSize(7);
    45     Value * tmpFileNamePtr = iBuilder->CreatePointerCast(iBuilder->CreateMalloc(iBuilder->CreateAdd(fileNameLength, suffixPlusNullLength)), iBuilder->getInt8PtrTy());
    46     iBuilder->setScalarField("tmpFileName", tmpFileNamePtr);
    47     iBuilder->CreateMemCpy(tmpFileNamePtr, fileName, fileNameLength, 1);
     43    Constant * suffixPlusNullLength = b->getSize(7);
     44    Value * tmpFileNamePtr = b->CreatePointerCast(b->CreateMalloc(b->CreateAdd(fileNameLength, suffixPlusNullLength)), b->getInt8PtrTy());
     45    b->setScalarField("tmpFileName", tmpFileNamePtr);
     46    b->CreateMemCpy(tmpFileNamePtr, fileName, fileNameLength, 1);
    4847#ifdef BACKUP_OLDFILE
    49     iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->GetString(".saved"), suffixPlusNullLength, 1);
    50     iBuilder->CreateRenameCall(fileName, tmpFileNamePtr);
     48    b->CreateMemCpy(b->CreateGEP(tmpFileNamePtr, fileNameLength), b->GetString(".saved"), suffixPlusNullLength, 1);
     49    b->CreateRenameCall(fileName, tmpFileNamePtr);
    5150#else
    52     iBuilder->CreateUnlinkCall(fileName);
     51    b->CreateUnlinkCall(fileName);
    5352#endif
    54     iBuilder->CreateMemCpy(iBuilder->CreateGEP(tmpFileNamePtr, fileNameLength), iBuilder->GetString("XXXXXX"), suffixPlusNullLength, 1);
    55     Value * fileDes = iBuilder->CreateMkstempCall(tmpFileNamePtr);
    56     iBuilder->setScalarField("fileDes", fileDes);
    57     Value * failure = iBuilder->CreateICmpEQ(fileDes, iBuilder->getInt32(-1));
    58     iBuilder->CreateCondBr(failure, setTerminationOnFailure, fileSinkInitExit);
    59     iBuilder->SetInsertPoint(setTerminationOnFailure);
    60     iBuilder->setTerminationSignal();
    61     iBuilder->CreateBr(fileSinkInitExit);
    62     iBuilder->SetInsertPoint(fileSinkInitExit);
     53    b->CreateMemCpy(b->CreateGEP(tmpFileNamePtr, fileNameLength), b->GetString("XXXXXX"), suffixPlusNullLength, 1);
     54    Value * fileDes = b->CreateMkstempCall(tmpFileNamePtr);
     55    b->setScalarField("fileDes", fileDes);
     56    Value * failure = b->CreateICmpEQ(fileDes, b->getInt32(-1));
     57    b->CreateCondBr(failure, setTerminationOnFailure, fileSinkInitExit);
     58
     59    b->SetInsertPoint(setTerminationOnFailure);
     60    b->setTerminationSignal();
     61    b->CreateBr(fileSinkInitExit);
     62
     63    b->SetInsertPoint(fileSinkInitExit);
    6364}
    6465
    65 Value * FileSink::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const numOfStrides) {
    66     BasicBlock * const closeFile = iBuilder->CreateBasicBlock("closeFile");
    67     BasicBlock * const fileOutExit = iBuilder->CreateBasicBlock("fileOutExit");
    68 
    69     Value * const fileDes = iBuilder->getScalarField("fileDes");
    70     Value * codeUnitBuffer = iBuilder->getInputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0));
    71     codeUnitBuffer = iBuilder->CreatePointerCast(codeUnitBuffer, iBuilder->getInt8PtrTy());
     66Value * FileSink::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * const numOfStrides) {
     67    Value * const fileDes = b->getScalarField("fileDes");
     68    Value * codeUnitBuffer = b->getInputStreamBlockPtr("codeUnitBuffer", b->getInt32(0));
     69    codeUnitBuffer = b->CreatePointerCast(codeUnitBuffer, b->getInt8PtrTy());
    7270    Value * bytesToDo = mAvailableItemCount[0];
    7371    if (LLVM_UNLIKELY(mCodeUnitWidth > 8)) {
    74         bytesToDo = iBuilder->CreateMul(bytesToDo, iBuilder->getSize(mCodeUnitWidth / 8));
     72        bytesToDo = b->CreateMul(bytesToDo, b->getSize(mCodeUnitWidth / 8));
    7573    } else if (LLVM_UNLIKELY(mCodeUnitWidth < 8)) {
    76         bytesToDo = iBuilder->CreateUDiv(bytesToDo, iBuilder->getSize(8 / mCodeUnitWidth));
     74        bytesToDo = b->CreateUDiv(bytesToDo, b->getSize(8 / mCodeUnitWidth));
    7775    }   
    78     iBuilder->CreateWriteCall(fileDes, codeUnitBuffer, bytesToDo);
    79     iBuilder->CreateUnlikelyCondBr(mIsFinal, closeFile, fileOutExit);
    80 
    81     iBuilder->SetInsertPoint(closeFile);   
    82     iBuilder->CreateCloseCall(fileDes);
    83     Value * newFileNamePtr = iBuilder->getScalarField("fileName");
    84     Value * tmpFileNamePtr = iBuilder->getScalarField("tmpFileName");
    85     iBuilder->CreateRenameCall(tmpFileNamePtr, newFileNamePtr);
    86     iBuilder->CreateFree(tmpFileNamePtr);   
    87     iBuilder->CreateBr(fileOutExit);
    88    
    89     iBuilder->SetInsertPoint(fileOutExit);
     76    b->CreateWriteCall(fileDes, codeUnitBuffer, bytesToDo);
    9077    return numOfStrides;
    9178}
    9279
    93 FileSink::FileSink(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned codeUnitWidth)
     80void FileSink::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
     81    Value * const fileDes = b->getScalarField("fileDes");
     82    b->CreateCloseCall(fileDes);
     83    Value * newFileNamePtr = b->getScalarField("fileName");
     84    Value * tmpFileNamePtr = b->getScalarField("tmpFileName");
     85    b->CreateRenameCall(tmpFileNamePtr, newFileNamePtr);
     86    b->CreateFree(tmpFileNamePtr);
     87}
     88
     89FileSink::FileSink(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned codeUnitWidth)
    9490: MultiBlockKernel("filesink" + std::to_string(codeUnitWidth),
    95 {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}},
     91{Binding{b->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}},
    9692{},
    97 {Binding{iBuilder->getInt8PtrTy(), "fileName"}}, {}, {Binding{iBuilder->getInt8PtrTy(), "tmpFileName"}, Binding{iBuilder->getInt32Ty(), "fileDes"}})
     93{Binding{b->getInt8PtrTy(), "fileName"}}, {}, {Binding{b->getInt8PtrTy(), "tmpFileName"}, Binding{b->getInt32Ty(), "fileDes"}})
    9894, mCodeUnitWidth(codeUnitWidth) {
    9995    // setKernelStride(getpagesize());
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5755 r5793  
    2828    void generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    2929    llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * const numOfStrides) override;
     30    void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) override;
    3031private:
    3132    const unsigned mCodeUnitWidth;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5782 r5793  
    163163    }
    164164    consumed = b->CreateURem(consumed, bufferSize);
    165     Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), bufferSize, consumed);
     165    Constant * capacity = bufferSize;
     166    if (mOverflowBlocks) {
     167        capacity = ConstantInt::get(fromPosition->getType(), (mBufferBlocks + mOverflowBlocks) * b->getStride());
     168    }
     169    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
    166170    return b->CreateNUWSub(limit, fromPosition);
    167171}
     
    219223}
    220224
    221 void StreamSetBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
    222     report_fatal_error("Copy back not supported for this buffer type:" + Name);
    223 }
    224 
    225225// Source File Buffer
    226226
     
    338338}
    339339
    340 Value * CircularBuffer::getLinearlyCopyableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
    341 //    Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
    342 //    Value * from = b->CreateURem(fromPosition, bufSize);
    343 //    Value * avail = b->CreateURem(availItems, bufSize);
    344 //    Value * wraparound = b->CreateICmpUGT(from, avail);
    345 
    346 
    347     return nullptr;
    348 }
    349 
    350340Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
    351341    Value * ptr = getBaseAddress(b, handle);
     
    372362}
    373363
    374 Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
    375     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    376     if (reverse) return writableProper;
    377     return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
    378 }
    379 
    380 void CircularCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
    381     assert (priorProduced->getType() == newProduced->getType());
    382     Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
    383     Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
    384     Value * newBufPos = b->CreateURem(newProduced, bufSize);
    385     BasicBlock * copyBack = b->CreateBasicBlock(Name + "_circularCopyBack");
    386     BasicBlock * done = b->CreateBasicBlock(Name + "_circularCopyBackDone");
    387     Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
    388     b->CreateCondBr(wraparound, copyBack, done);
    389 
    390     b->SetInsertPoint(copyBack);
    391     Value * const baseAddress = getBaseAddress(b, handle);
    392     Value * overflowAddress = b->CreateGEP(baseAddress, b->getInt32(mBufferBlocks));
    393     // copyStream(b, baseAddress, b->getSize(0), overflowAddress, b->getSize(0), newBufPos);
    394     createBlockAlignedCopy(b, baseAddress, overflowAddress, newBufPos);
    395     b->CreateBr(done);
    396 
    397     b->SetInsertPoint(done);
    398 }
    399 
    400364
    401365// SwizzledCopybackBuffer Buffer
    402 
    403366void SwizzledCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
    404367    Type * const ty = getType();
     
    450413}
    451414
    452 Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    453     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    454     if (reverse) return writableProper;
    455     return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
    456 }
    457 
    458 void SwizzledCopybackBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProduced, Value * newProduced, const std::string Name) const {
    459     assert (priorProduced->getType() == newProduced->getType());
    460     Constant * bufSize = ConstantInt::get(priorProduced->getType(), mBufferBlocks * b->getBitBlockWidth());
    461     Value * priorBufPos = b->CreateURem(priorProduced, bufSize);
    462     Value * newBufPos = b->CreateURem(newProduced, bufSize);
    463     BasicBlock * copyBack = b->CreateBasicBlock(Name + "_swizzledCopyBack");
    464     BasicBlock * done = b->CreateBasicBlock(Name + "_swizzledCopyBackDone");
    465     Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
    466     b->CreateCondBr(wraparound, copyBack, done);
    467     b->SetInsertPoint(copyBack);
    468     Value * overFlowAreaPtr = b->CreateGEP(handle, b->getSize(mBufferBlocks));
    469     createBlockAlignedCopy(b, handle, overFlowAreaPtr, newBufPos);
    470     b->CreateBr(done);
    471     b->SetInsertPoint(done);
    472 }
    473 
    474415// Expandable Buffer
    475416
     
    707648    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))});
    708649    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
    709 }
    710 
    711 void DynamicBuffer::genCopyBackLogic(IDISA::IDISA_Builder * const b, Value * const handle, Value * priorProducedCount, Value * newProducedCount, const std::string Name) const {
    712     assert (priorProducedCount->getType() == newProducedCount->getType());   
    713     Value * workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(DynamicBuffer::Field::WorkingBlocks))}));
    714     assert (workingBlocks->getType() == newProducedCount->getType());
    715     Value * bufSize = b->CreateMul(workingBlocks, ConstantInt::get(workingBlocks->getType(), b->getBitBlockWidth()));
    716     Value * priorBufPos = b->CreateURem(priorProducedCount, bufSize);
    717     Value * newBufPos = b->CreateURem(newProducedCount, bufSize);
    718     BasicBlock * copyBack = b->CreateBasicBlock(Name + "_dynamicCopyBack");
    719     BasicBlock * done = b->CreateBasicBlock(Name + "_dynamicCopyBackDone");
    720 
    721     Value * wraparound = b->CreateICmpUGT(priorBufPos, newBufPos);
    722     b->CreateCondBr(wraparound, copyBack, done);
    723 
    724     b->SetInsertPoint(copyBack);
    725     Value * bufBasePtr = getBaseAddress(b, handle);
    726     Value * overFlowAreaPtr = b->CreateGEP(bufBasePtr, workingBlocks);
    727     createBlockAlignedCopy(b, bufBasePtr, overFlowAreaPtr, newBufPos);
    728     b->CreateBr(done);
    729 
    730     b->SetInsertPoint(done);
    731650}
    732651
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5782 r5793  
    8989    virtual llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPos, llvm::Value * avail, bool reverse = false) const;
    9090
    91     virtual llvm::Value * getLinearlyCopyableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPos, llvm::Value * avail, bool reverse = false) const {
    92         return getLinearlyAccessibleItems(b, handle, fromPos, avail, reverse);
    93     }
    94    
    9591    void createBlockCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * blocksToCopy) const;
    9692
     
    106102        return mOverflowBlocks;
    107103    }
    108 
    109     virtual void genCopyBackLogic(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * priorProduced, llvm::Value * newProduced, const std::string) const;
    110104   
    111105    virtual ~StreamSetBuffer() = 0;
     
    221215    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const final;
    222216
    223     llvm::Value * getLinearlyCopyableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPos, llvm::Value * avail, bool reverse = false) const final;
    224 
    225217protected:
    226218
     
    243235   
    244236    CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace = 0);
    245    
    246     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    247    
    248     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    249 
    250     void genCopyBackLogic(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * priorProduced, llvm::Value * newProduced, const std::string) const override;
     237       
     238    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    251239
    252240};
     
    260248    void createBlockAlignedCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy, const unsigned alignment = 1) const override;
    261249
    262     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    263    
    264     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    265 
    266     void genCopyBackLogic(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * priorProduced, llvm::Value * newProduced, const std::string) const override;
     250    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    267251
    268252protected:
     
    330314   
    331315    void doubleCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle);
    332 
    333     void genCopyBackLogic(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * priorProduced, llvm::Value * newProduced, const std::string) const override;
    334316
    335317protected:
Note: See TracChangeset for help on using the changeset viewer.