Ignore:
Timestamp:
Dec 6, 2017, 5:41:27 PM (18 months ago)
Author:
nmedfort
Message:

Bug fixes + more assertions to prevent similar errors.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5755 r5757  
    6161    }
    6262
    63     bool notDeferred() const {
     63    bool nonDeferred() const {
    6464        return !hasAttribute(Attribute::KindId::Deferred);
    6565    }
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5755 r5757  
    514514
    515515/** ------------------------------------------------------------------------------------------------------------- *
     516 * @brief getLowerBound
     517 ** ------------------------------------------------------------------------------------------------------------- */
     518ProcessingRate::RateValue Kernel::getLowerBound(const ProcessingRate & rate) const {
     519    if (rate.isFixed() || rate.isBounded()) {
     520        return rate.getLowerBound();
     521    } else if (rate.isRelative()) {
     522        return rate.getRate() * getLowerBound(getBinding(rate.getReference()).getRate());
     523    } else { // if (rate.isUnknown())
     524        return 0;
     525    }
     526}
     527
     528/** ------------------------------------------------------------------------------------------------------------- *
     529 * @brief getUpperBound
     530 ** ------------------------------------------------------------------------------------------------------------- */
     531ProcessingRate::RateValue Kernel::getUpperBound(const ProcessingRate &rate) const {
     532    if (rate.isFixed() || rate.isBounded()) {
     533        return rate.getUpperBound();
     534    } else if (rate.isRelative()) {
     535        return rate.getRate() * getUpperBound(getBinding(rate.getReference()).getRate());
     536    } else { // if (rate.isUnknown())
     537        return 0;
     538    }
     539}
     540
     541/** ------------------------------------------------------------------------------------------------------------- *
    516542 * @brief normalizeRelativeToFixedProcessingRate
    517543 ** ------------------------------------------------------------------------------------------------------------- */
     
    567593 * @brief requiresBufferedFinalStride
    568594 ** ------------------------------------------------------------------------------------------------------------- */
    569 inline bool requiresBufferedFinalStride(const Binding & b) {
    570     if (LLVM_LIKELY(isa<ArrayType>(b.getType()))) {
    571         return b.getType()->getArrayNumElements() == 1;
     595inline bool requiresBufferedFinalStride(const Binding & binding) {
     596    if (LLVM_LIKELY(isa<ArrayType>(binding.getType()))) {
     597        return binding.getType()->getArrayNumElements() == 1;
    572598    }
    573599    return true;
     
    583609    }
    584610    return cast<IntegerType>(ty->getVectorElementType())->getBitWidth();
    585 }
    586 
    587 /** ------------------------------------------------------------------------------------------------------------- *
    588  * @brief getLowerBound
    589  ** ------------------------------------------------------------------------------------------------------------- */
    590 ProcessingRate::RateValue MultiBlockKernel::getLowerBound(const ProcessingRate & rate) const {
    591     if (rate.isFixed() || rate.isBounded()) {
    592         return rate.getLowerBound();
    593     } else if (rate.isRelative()) {
    594         return rate.getRate() * getLowerBound(getBinding(rate.getReference()).getRate());
    595     } else { // if (rate.isUnknown())
    596         return 0;
    597     }
    598 }
    599 
    600 /** ------------------------------------------------------------------------------------------------------------- *
    601  * @brief getUpperBound
    602  ** ------------------------------------------------------------------------------------------------------------- */
    603 ProcessingRate::RateValue MultiBlockKernel::getUpperBound(const ProcessingRate &rate) const {
    604     if (rate.isFixed() || rate.isBounded()) {
    605         return rate.getUpperBound();
    606     } else if (rate.isRelative()) {
    607         return rate.getRate() * getUpperBound(getBinding(rate.getReference()).getRate());
    608     } else { // if (rate.isUnknown())
    609         return 0;
    610     }
    611611}
    612612
     
    679679
    680680    // Define and allocate the temporary buffer area in the prolog.
    681     const auto alignment = b->getBitBlockWidth() / 8;
     681    const auto blockAlignment = b->getBitBlockWidth() / 8;
    682682    Value * temporaryInputBuffer[inputSetCount];
    683683    for (unsigned i = 0; i < inputSetCount; ++i) {
     
    693693            report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input.");
    694694        } else {           
    695             temporaryInputBuffer[i] = b->CreateAlignedAlloca(ty, alignment, b->getSize(roundUp(ub)));
     695            temporaryInputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub)));
    696696            Type * const sty = temporaryInputBuffer[i]->getType()->getPointerElementType();
    697697            b->CreateStore(Constant::getNullValue(sty), temporaryInputBuffer[i]);
     
    710710                ub += mStreamSetOutputBuffers[i]->overflowSize();
    711711            }
    712             temporaryOutputBuffer[i] = b->CreateAlignedAlloca(ty, alignment, b->getSize(roundUp(ub)));
     712            temporaryOutputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub)));
    713713            Type * const sty = temporaryOutputBuffer[i]->getType()->getPointerElementType();
    714714            b->CreateStore(Constant::getNullValue(sty), temporaryOutputBuffer[i]);
     
    754754        assert (ic->getType() == mAvailableItemCount[i]->getType());
    755755        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic);
     756
    756757        mStreamSetInputBaseAddress[i]  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    757758        mInitialAvailableItemCount[i] = mAvailableItemCount[i];
    758759        mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed);
     760
    759761        // Are our linearly accessible items sufficient for a stride?
    760762        inputStrideSize[i] = getStrideSize(b, rate);
     763
    761764        Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]);
    762         if (!rate.isFixed() || requiresBufferedFinalStride(input)) {
     765        if (!rate.isFixed() || (requiresBufferedFinalStride(input) && input.nonDeferred())) {
    763766
    764767            // Since we trust that the pipeline won't call this kernel unless there is enough data to process a stride, whenever
     
    774777            b->SetInsertPoint(copyFromBack);
    775778            Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]);
     779
    776780            b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable), "linearly available cannot be greater than temporarily available");
    777781            Value * const tempBufferPtr = temporaryInputBuffer[i];
    778782            Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK);
    779             const auto alignment = getItemAlignment(mStreamSetInputs[i]);
    780             b->CreateStreamCpy(name, tempBufferPtr, ZERO, mStreamSetInputBaseAddress[i] , offset, mAvailableItemCount[i], alignment);
     783            const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]);
     784            b->CreateMemZero(tempBufferPtr, ConstantExpr::getSizeOf(tempBufferPtr->getType()), blockAlignment);
     785            b->CreateStreamCpy(name, tempBufferPtr, ZERO, mStreamSetInputBaseAddress[i] , offset, mAvailableItemCount[i], copyAlignment);
    781786            Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE);
    782787            BasicBlock * const copyToBackEnd = b->GetInsertBlock();
     
    786791            Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);
    787792            Value * const baseAddress = b->getBaseAddress(name);
    788             b->CreateStreamCpy(name, tempBufferPtr, mAvailableItemCount[i], baseAddress, ZERO, remaining, alignment);
     793            b->CreateStreamCpy(name, tempBufferPtr, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
    789794            BasicBlock * const copyToFrontEnd = b->GetInsertBlock();
    790795            b->CreateBr(resume);
     
    844849    }
    845850
     851    BasicBlock * const segmentDone = b->CreateBasicBlock("SegmentDone");
     852
    846853    Value * const initiallyFinal = mIsFinal;
    847854    if (LLVM_LIKELY(numOfStrides != nullptr)) {
    848855        mIsFinal = b->CreateAnd(mIsFinal, b->CreateICmpEQ(numOfStrides, ZERO));
    849         Value * const processStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal);
    850         b->CreateAssert(processStride, getName() + " does not have sufficient input data or output space for one stride");
     856        Value * const hasStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal);
     857        b->CreateAssert(hasStride, getName() + " has insufficient input data or output space for one stride");
    851858        for (unsigned i = 0; i < inputSetCount; ++i) {
    852859            const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    853             if (rate.isFixed() && mStreamSetInputs[i].notDeferred()) {
     860            if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
    854861                mAvailableItemCount[i] = b->CreateSelect(mIsFinal, mAvailableItemCount[i], b->CreateMul(numOfStrides, inputStrideSize[i]));
    855862            }
     
    862869    for (unsigned i = 0; i < inputSetCount; ++i) {
    863870        const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    864         if (rate.isFixed() && mStreamSetInputs[i].notDeferred()) {
     871        if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
    865872            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    866873            b->setProcessedItemCount(mStreamSetInputs[i].getName(), ic);
     
    871878        const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
    872879        if (rate.isFixed()) {
    873             assert (mStreamSetOutputs[i].notDeferred());
     880            assert (mStreamSetOutputs[i].nonDeferred());
    874881            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    875882            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
     
    882889    BasicBlock * const strideDone = b->CreateBasicBlock("MultiBlockDone");
    883890
    884     b->CreateLikelyCondBr(b->CreateICmpNE(numOfStrides, ZERO), temporaryBufferCopyBack, handleFinalBlock);
     891    b->CreateUnlikelyCondBr(mIsFinal, handleFinalBlock, temporaryBufferCopyBack);
    885892
    886893
     
    906913        }
    907914        Value * const baseBuffer = baseOutputBuffer[i];
    908         assert ("stack overflow" && (tempBuffer->getType() == baseBuffer->getType()));
     915        assert ("stack corruption likely" && (tempBuffer->getType() == baseBuffer->getType()));
    909916        const auto & name = mStreamSetOutputs[i].getName();
    910917        BasicBlock * const copyToBack = b->CreateBasicBlock(name + "CopyToBack");
    911918        BasicBlock * const copyToFront = b->CreateBasicBlock(name + "CopyToFront");
     919        BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearBuffer");
    912920        BasicBlock * const resume = b->CreateBasicBlock(name + "ResumeCopyBack");
    913921        // If we used a temporary buffer, copy it back to the original output buffer
     
    922930        b->CreateStreamCpy(name, baseBuffer, offset, tempBuffer, ZERO, toWrite, alignment);
    923931        // If we required a temporary output buffer, we will probably need to write to the beginning of the buffer as well.
    924         b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, resume);
     932        b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, clearBuffer);
    925933
    926934        b->SetInsertPoint(copyToFront);
     
    928936        Value * const baseAddress = b->getBaseAddress(name);
    929937        b->CreateStreamCpy(name, baseAddress, ZERO, tempBuffer, toWrite, remaining, alignment);
     938        b->CreateBr(clearBuffer);
     939        // Clear the buffer after use since we may end up reusing it within the same stride
     940        b->SetInsertPoint(clearBuffer);
     941
    930942        b->CreateBr(resume);
    931943
     
    933945    }
    934946
    935     strideDone->moveAfter(b->GetInsertBlock());
    936 
    937     BasicBlock * const segmentDone = b->CreateBasicBlock("SegmentDone");
    938947    //  We've dealt with the partial block processing and copied information back into the
    939948    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
     
    950959
    951960    /// STRIDE DONE
     961    strideDone->moveAfter(b->GetInsertBlock());
    952962    b->SetInsertPoint(strideDone);
    953963
     964    b->CreateAssertZero(mIsFinal, "stride done cannot process the final block");
     965
    954966    // do we have enough data for another stride?
    955     Value * pendingStrides = nullptr;
     967    Value * hasMoreStrides = b->getTrue();
    956968    for (unsigned i = 0; i < inputSetCount; ++i) {
    957         Value * const processed = b->getProcessedItemCount(mStreamSetInputs[i].getName());
    958         Value * const remaining = b->CreateSub(mInitialAvailableItemCount[i], processed);
     969        const auto & name = mStreamSetInputs[i].getName();
     970        Value * const avail = mInitialAvailableItemCount[i];
     971        Value * const processed = b->getProcessedItemCount(name);
     972        b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
     973        Value * const remaining = b->CreateSub(avail, processed);
    959974        Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    960         pendingStrides = b->CreateUMin(pendingStrides, remainingStrides);
    961     }
     975        hasMoreStrides = b->CreateAnd(hasMoreStrides, b->CreateICmpNE(remainingStrides, ZERO));
     976    }
     977    // even if we do not have enough input data for a full stride, if this is our final stride, allow it ...
     978    hasMoreStrides = b->CreateOr(hasMoreStrides, initiallyFinal);
    962979
    963980    // do we have enough room for another stride?
     
    965982        const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
    966983        const auto & name = mStreamSetOutputs[i].getName();
    967         Value * const newProduced = b->getProducedItemCount(name);
     984        Value * const produced = b->getProducedItemCount(name);
    968985        // If this output has a Fixed/Bounded rate, determine whether we have room for another stride.
    969986        if (LLVM_LIKELY(outputStrideSize[i] != nullptr)) {
    970             Value * const unconsumed = b->CreateSub(newProduced, b->getConsumedItemCount(name));
    971             Value * const remaining = b->CreateSub(b->getCapacity(name), unconsumed);
     987            Value * const consumed = b->getConsumedItemCount(name);
     988            b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     989            Value * const unconsumed = b->CreateSub(produced, consumed);
     990            Value * const capacity = b->getCapacity(name);
     991            b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     992            Value * const remaining = b->CreateSub(capacity, unconsumed);
    972993            Value * const remainingStrides = b->CreateUDiv(remaining, outputStrideSize[i]);
    973             pendingStrides = b->CreateUMin(pendingStrides, remainingStrides);
     994            hasMoreStrides = b->CreateAnd(hasMoreStrides, b->CreateICmpNE(remainingStrides, ZERO));
    974995        }
    975996        // Do copybacks if necessary.
    976997        if (mStreamSetOutputBuffers[i]->supportsCopyBack() && requiresCopyBack(rate)) {
    977             b->CreateCopyBack(name, mInitialProducedItemCount[i], newProduced);
    978         }
    979     }
    980 
    981     Value * const hasMoreStrides = b->CreateOr(b->CreateICmpNE(pendingStrides, ZERO), initiallyFinal);
     998            b->CreateCopyBack(name, mInitialProducedItemCount[i], produced);
     999        }
     1000    }
    9821001    b->CreateCondBr(hasMoreStrides, segmentLoop, segmentDone);
    9831002
     
    11031122        const ProcessingRate & pr = output.getRate();
    11041123        Value * produced = nullptr;
    1105         if (pr.isFixed() && output.notDeferred()) {
     1124        if (pr.isFixed() && output.nonDeferred()) {
    11061125            assert (baseInitialProcessedItemCount && scaledInverseOfAvailItemCount);
    11071126            const auto rate = pr.getRate();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5755 r5757  
    117117
    118118    const Binding & getBinding(const std::string & name) const;
     119
     120    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
     121
     122    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
    119123
    120124    const StreamSetBuffers & getStreamSetInputBuffers() const {
     
    431435    unsigned getItemAlignment(const Binding & binding) const;
    432436
    433     ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
    434 
    435     ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
    436 
    437437    bool isTransitivelyUnknownRate(const ProcessingRate & rate) const;
    438438
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5755 r5757  
    169169 ** ------------------------------------------------------------------------------------------------------------- */
    170170inline unsigned getFieldWidth(const unsigned bitWidth, const unsigned blockWidth) {
    171     for (unsigned k = 16; k < blockWidth; k *= 2) {
     171    for (unsigned k = 16; k <= blockWidth; k *= 2) {
    172172        if ((bitWidth & (k - 1)) != 0) {
    173173            return k / 2;
     
    186186    assert (target->getType() == source->getType());
    187187    assert (target->getType()->isPointerTy());
     188    assert (isConstantZero(targetOffset) || isConstantZero(sourceOffset));
    188189
    189190    const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
    190 
    191191    const auto itemWidth = getItemWidth(buf->getBaseType());
    192192    assert ("invalid item width" && is_power_2(itemWidth));
    193193    const auto blockWidth = getBitBlockWidth();
    194 
     194    // Although our item width may be n bits, if we know we're always processing m items per block, our field width
     195    // (w.r.t the stream copy) would be n*m. By taking this into account we can optimize and simplify the copy code.
    195196    const auto fieldWidth = getFieldWidth(itemWidth * itemAlignment, blockWidth);
    196     assert ("overflow error" && is_power_2(fieldWidth) && (itemWidth <= fieldWidth));
    197 
    198     assert (isConstantZero(targetOffset) || isConstantZero(sourceOffset));
    199 
    200     IntegerType * const fieldWidthTy = getIntNTy(fieldWidth / 8);
    201 
    202     const auto alignment = fieldWidth / 8;
     197    assert ("overflow error" && is_power_2(fieldWidth) && (itemWidth <= fieldWidth) && (fieldWidth <= blockWidth));
    203198
    204199    if (LLVM_LIKELY(itemWidth < fieldWidth)) {
     
    211206
    212207    /*
    213 
    214208       Streams are conceptually modelled as:
    215209
     
    232226    */
    233227
     228    const auto alignment = (fieldWidth + 7) / 8;
     229
     230    Type * const fieldWidthTy = getIntNTy(fieldWidth);
     231
    234232    Value * const n = buf->getStreamSetCount(this, getStreamHandle(name));
    235     if (fieldWidth == blockWidth || isConstantOne(n) || (isConstantZero(targetOffset) && isConstantZero(sourceOffset))) {
    236         PointerType * const fieldWidthPtrTy = fieldWidthTy->getPointerTo();
     233    if (isConstantOne(n) || fieldWidth == blockWidth || (isConstantZero(targetOffset) && isConstantZero(sourceOffset))) {
    237234        if (isConstantOne(n)) {
    238235            if (LLVM_LIKELY(itemWidth < 8)) {
     
    242239            }
    243240        } else {
    244             itemsToCopy = CreateMul(CreateUDivCeil(itemsToCopy, getSize(blockWidth / (8 * itemWidth))), n);
    245         }
    246         target = CreateGEP(CreatePointerCast(target, fieldWidthPtrTy), targetOffset);
    247         source = CreateGEP(CreatePointerCast(source, fieldWidthPtrTy), sourceOffset);
     241            if (LLVM_LIKELY(blockWidth > (itemWidth * 8))) {
     242                itemsToCopy = CreateUDivCeil(itemsToCopy, getSize(blockWidth / (8 * itemWidth)));
     243            } else if (LLVM_LIKELY(blockWidth < (itemWidth * 8))) {
     244                itemsToCopy = CreateUDivCeil(CreateMul(itemsToCopy, getSize(8)), getSize(blockWidth / itemWidth));
     245            }
     246            itemsToCopy = CreateMul(itemsToCopy, n);
     247        }
     248        PointerType * const ptrTy = fieldWidthTy->getPointerTo();
     249        target = CreateGEP(CreatePointerCast(target, ptrTy), targetOffset);
     250        source = CreateGEP(CreatePointerCast(source, ptrTy), sourceOffset);
    248251        CreateMemCpy(target, source, itemsToCopy, alignment);
    249252
     
    255258        target = CreatePointerCast(target, blockPtrTy);
    256259        source = CreatePointerCast(source, blockPtrTy);
     260
     261        assert ((blockWidth % fieldWidth) == 0);
    257262
    258263        VectorType * const shiftTy = VectorType::get(fieldWidthTy, blockWidth / fieldWidth);
    259264        Constant * const width = getSize(blockWidth / itemWidth);
    260265        BasicBlock * const entry = GetInsertBlock();
    261 
    262266
    263267        if (isConstantZero(targetOffset)) {
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5755 r5757  
    161161            Value * ptr = iBuilder->getInputStreamPackPtr("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    162162            // CreateLoad defaults to aligned here, so we need to force the alignment to 1 byte.
    163             bytepack[i] = iBuilder->CreateAlignedLoad(ptr, 1);
     163            bytepack[i] = iBuilder->CreateAlignedLoad(ptr, 1);           
    164164        }
    165165    }
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5755 r5757  
    4242    const unsigned fieldCount = iBuilder->getBitBlockWidth() / sizeTy->getBitWidth();
    4343    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
     44    Constant * const ZERO = ConstantInt::getNullValue(sizeTy);
    4445
    4546    Value * match_result = iBuilder->getInputStreamBlockPtr("matchResult", iBuilder->getInt32(0));
     
    101102        phiRecordNum->addIncoming(phiLineNum, scanWordIteration);
    102103        phiRecordStart->addIncoming(phiLineStart, scanWordIteration);
    103         Value * anyMatches = iBuilder->CreateICmpNE(phiMatchWord, ConstantInt::getNullValue(sizeTy));
     104        Value * anyMatches = iBuilder->CreateICmpNE(phiMatchWord, ZERO);
    104105        iBuilder->CreateCondBr(anyMatches, processMatchesEntry, processMatchesExit);
    105106
     
    109110            Value * prior_breaks = iBuilder->CreateAnd(iBuilder->CreateMaskToLowestBitExclusive(phiMatchWord), phiRecordBreaks);
    110111            // Within the loop we have a conditional block that is executed if there are any prior record breaks.
    111             Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::getNullValue(sizeTy));
     112            Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ZERO);
    112113            iBuilder->CreateCondBr(prior_breaks_cond, prior_breaks_block, loop_final_block);
    113114
     
    116117                iBuilder->SetInsertPoint(prior_breaks_block);
    117118                Value * matchedRecordNum = iBuilder->CreateAdd(iBuilder->CreatePopcount(prior_breaks), phiRecordNum);
    118                 Value * reverseDistance = iBuilder->CreateCountReverseZeroes(prior_breaks);
     119                Value * reverseDistance = iBuilder->CreateCountReverseZeroes(prior_breaks, true);
    119120                Value * width = ConstantInt::get(sizeTy, sizeTy->getBitWidth());
    120                 Value * priorRecordStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseDistance));
     121                Value * priorRecordStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseDistance));               
    121122                iBuilder->CreateBr(loop_final_block);
    122123
     
    130131            phiRecordNum->addIncoming(matchRecordNum, loop_final_block);
    131132
    132             PHINode * matchRecordStart = iBuilder->CreatePHI(sizeTy, 2, "matchRecordStart");
     133            PHINode * const matchRecordStart = iBuilder->CreatePHI(sizeTy, 2, "matchRecordStart");
    133134            matchRecordStart->addIncoming(phiRecordStart, processMatchesEntry);
    134135            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
    135136            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
    136             Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord));
    137             Value * const inputStream = iBuilder->getRawInputPointer("InputStream", iBuilder->getInt32(0));
    138             Function * dispatcher = m->getFunction("accumulate_match_wrapper"); assert (dispatcher);
    139             Value * start_ptr = iBuilder->CreateGEP(inputStream, matchRecordStart);
    140             Value * end_ptr = iBuilder->CreateGEP(inputStream, matchRecordEnd);
    141             iBuilder->CreateCall(dispatcher, {accumulator, matchRecordNum, start_ptr, end_ptr});
     137            Value * const matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord, true));
     138            Function * const dispatcher = m->getFunction("accumulate_match_wrapper"); assert (dispatcher);
     139            Value * const startPtr = iBuilder->getRawInputPointer("InputStream", matchRecordStart);
     140            Value * const endPtr = iBuilder->getRawInputPointer("InputStream", matchRecordEnd);
     141            iBuilder->CreateCall(dispatcher, {accumulator, matchRecordNum, startPtr, endPtr});
    142142            Value * remaining_matches = iBuilder->CreateResetLowestBit(phiMatchWord);
    143143            phiMatchWord->addIncoming(remaining_matches, loop_final_block);
     
    151151        iBuilder->SetInsertPoint(processMatchesExit);
    152152        // When the matches are done, there may be additional record breaks remaining
    153         Value * more_breaks_cond = iBuilder->CreateICmpNE(phiRecordBreaks, ConstantInt::getNullValue(sizeTy));
     153        Value * more_breaks_cond = iBuilder->CreateICmpNE(phiRecordBreaks, ZERO);
    154154        iBuilder->CreateCondBr(more_breaks_cond, remaining_breaks_block, return_block);
    155155
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5755 r5757  
    1212using namespace llvm;
    1313
    14 inline static size_t round_up_to_nearest(const size_t x, const size_t y) {
    15     return (((x - 1) | (y - 1)) + 1);
    16 }
    17 
    1814uint64_t file_size(const uint32_t fd) {
    1915    struct stat st;
     
    3228}
    3329
    34 void MMapSourceKernel::generateInitializeMethod(Function * const fileSizeMethod, const unsigned codeUnitWidth, const std::unique_ptr<KernelBuilder> & kb) {
     30void MMapSourceKernel::generateInitializeMethod(Function * const fileSizeMethod, const unsigned codeUnitWidth, const unsigned /* blocksRequiredPerSegment */, const std::unique_ptr<KernelBuilder> & kb) {
    3531    BasicBlock * const emptyFile = kb->CreateBasicBlock("EmptyFile");
    3632    BasicBlock * const nonEmptyFile = kb->CreateBasicBlock("NonEmptyFile");
     
    6359    size->addIncoming(pageSize, emptyFile);
    6460    size->addIncoming(fileSize, nonEmptyFile);
    65     Value * bufferPtr = kb->CreatePointerCast(buffer, kb->getInt8PtrTy());
     61
     62    PointerType * const codeUnitPtrTy = kb->getIntNTy(codeUnitWidth)->getPointerTo();
     63    Value * bufferPtr = kb->CreatePointerCast(buffer, codeUnitPtrTy);
    6664    kb->setBaseAddress("sourceBuffer", bufferPtr);
    6765    kb->setBufferedSize("sourceBuffer", size);
     
    7371}
    7472
    75 void MMapSourceKernel::generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned segmentBlocks, const std::unique_ptr<KernelBuilder> & kb) {
     73void MMapSourceKernel::generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<KernelBuilder> & kb) {
    7674
    7775    BasicBlock * dropPages = kb->CreateBasicBlock("dropPages");
     
    8078    BasicBlock * mmapSourceExit = kb->CreateBasicBlock("mmapSourceExit");
    8179
     80    Constant * const segmentSize = kb->getSize(blocksRequiredPerSegment * kb->getBitBlockWidth());
     81    Constant * const pageSize = kb->getSize(getpagesize());
     82
     83    Value * consumed = kb->getConsumedItemCount("sourceBuffer");
     84    consumed = kb->CreateMul(consumed, kb->getSize(codeUnitWidth / 8));
     85    consumed = kb->CreateAnd(consumed, ConstantExpr::getNeg(pageSize));
     86
     87    Value * const consumedBuffer = kb->getRawOutputPointer("sourceBuffer", consumed);
     88    Value * const readableBuffer = kb->getScalarField("readableBuffer");
     89    Value * const unnecessaryBytes = kb->CreatePtrDiff(consumedBuffer, readableBuffer);
     90
     91    // avoid calling madvise unless an actual page table change could occur
     92    kb->CreateLikelyCondBr(kb->CreateIsNotNull(unnecessaryBytes), processSegment, dropPages);
     93
     94    kb->SetInsertPoint(dropPages);
    8295    // instruct the OS that it can safely drop any fully consumed pages
    83     Value * consumed = kb->getConsumedItemCount("sourceBuffer");
    84     IntegerType * const consumedTy = cast<IntegerType>(consumed->getType());
    85     Type * const int8PtrTy = kb->getInt8PtrTy();
    86 
    87     DataLayout DL(kb->getModule());
    88     IntegerType * const intAddrTy = kb->getIntPtrTy(DL);
    89 
    90     // multiply the consumed count by the code unit size then mask off any partial pages
    91     if (codeUnitWidth > 8) {
    92         consumed = kb->CreateMul(consumed, ConstantInt::get(consumedTy, codeUnitWidth / 8));
    93     }
    94     const auto pageSize = getpagesize();
    95     if (LLVM_LIKELY((pageSize & (pageSize - 1)) == 0)) {
    96         consumed = kb->CreateAnd(consumed, ConstantExpr::getNeg(ConstantInt::get(consumedTy, pageSize)));
    97     } else {
    98         consumed = kb->CreateSub(consumed, kb->CreateURem(consumed, ConstantInt::get(consumedTy, pageSize)));
    99     }
    100 
    101     Value * sourceBuffer = kb->getBaseAddress("sourceBuffer");
    102     sourceBuffer = kb->CreatePtrToInt(sourceBuffer, intAddrTy);
    103     if (LLVM_UNLIKELY(intAddrTy->getBitWidth() > consumedTy->getBitWidth())) {
    104         consumed = kb->CreateZExt(consumed, intAddrTy);
    105     } else if (LLVM_UNLIKELY(intAddrTy->getBitWidth() < consumedTy->getBitWidth())) {
    106         sourceBuffer = kb->CreateZExt(sourceBuffer, consumedTy);
    107     }
    108     Value * consumedBuffer = kb->CreateAdd(sourceBuffer, consumed);
    109     Value * readableBuffer = kb->getScalarField("readableBuffer");
    110     readableBuffer = kb->CreatePtrToInt(readableBuffer, consumedBuffer->getType());
    111     Value * unnecessaryBytes = kb->CreateSub(consumedBuffer, readableBuffer);
    112 
    113     // avoid calling madvise unless an actual page table change could occur
    114     Value * hasPagesToDrop = kb->CreateICmpEQ(unnecessaryBytes, ConstantInt::getNullValue(intAddrTy));
    115     kb->CreateLikelyCondBr(hasPagesToDrop, processSegment, dropPages);
    116 
    117     kb->SetInsertPoint(dropPages);
    118     kb->CreateMAdvise(kb->CreateIntToPtr(readableBuffer, int8PtrTy), unnecessaryBytes, CBuilder::ADVICE_DONTNEED);
    119     readableBuffer = kb->CreateIntToPtr(kb->CreateAdd(readableBuffer, unnecessaryBytes), int8PtrTy);
    120     kb->setScalarField("readableBuffer", readableBuffer);
     96    kb->CreateMAdvise(readableBuffer, unnecessaryBytes, CBuilder::ADVICE_DONTNEED);
     97    kb->setScalarField("readableBuffer", kb->CreateGEP(readableBuffer, unnecessaryBytes));
    12198    kb->CreateBr(processSegment);
    12299
    123100    // determine whether or not we've exhausted the file buffer
    124101    kb->SetInsertPoint(processSegment);
    125     ConstantInt * segmentItems = kb->getSize(segmentBlocks * kb->getBitBlockWidth());
    126102    Value * const fileSize = kb->getScalarField("fileSize");
    127     Value * const produced = kb->CreateAdd(kb->getProducedItemCount("sourceBuffer"), segmentItems);
     103    Value * const produced = kb->CreateAdd(kb->getProducedItemCount("sourceBuffer"), segmentSize);
    128104    Value * const lessThanFullSegment = kb->CreateICmpULT(fileSize, produced);
    129105    kb->CreateUnlikelyCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
     
    146122}
    147123
    148 MMapSourceKernel::MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned blocksPerSegment, unsigned codeUnitWidth)
    149 : SegmentOrientedKernel("mmap_source" + std::to_string(blocksPerSegment) + "@" + std::to_string(codeUnitWidth),
     124MMapSourceKernel::MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned blocksRequiredPerSegment, unsigned codeUnitWidth)
     125: SegmentOrientedKernel("mmap_source" + std::to_string(blocksRequiredPerSegment) + "@" + std::to_string(codeUnitWidth),
    150126{},
    151 {Binding{kb->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}},
     127{Binding{kb->getStreamSetTy(1, codeUnitWidth), "sourceBuffer", FixedRate(), Deferred()}},
    152128{Binding{kb->getInt32Ty(), "fileDescriptor"}},
    153 {Binding{kb->getSizeTy(), "fileSize"}}, {Binding{kb->getInt8PtrTy(), "readableBuffer"}})
    154 , mSegmentBlocks(blocksPerSegment)
     129{Binding{kb->getSizeTy(), "fileSize"}}, {Binding{kb->getIntNTy(codeUnitWidth)->getPointerTo(), "readableBuffer"}})
     130, mBlocksRequiredPerSegment(blocksRequiredPerSegment)
    155131, mCodeUnitWidth(codeUnitWidth)
    156132, mFileSizeFunction(nullptr) {
     
    160136/// READ SOURCE KERNEL
    161137
    162 void ReadSourceKernel::generateInitializeMethod(const unsigned codeUnitWidth, const std::unique_ptr<KernelBuilder> & kb) {
    163     const size_t initialBufferSize = 8 * getpagesize() * codeUnitWidth;
    164     ConstantInt * const bufferBytes = kb->getSize(initialBufferSize * codeUnitWidth / 8);
    165     PointerType * const codeUnitPtrTy = IntegerType::get(kb->getContext(), codeUnitWidth)->getPointerTo();
    166     Value * const buffer = kb->CreatePointerCast(kb->CreateCacheAlignedMalloc(bufferBytes), codeUnitPtrTy);
    167     kb->setScalarField("buffer", buffer);
    168     kb->setScalarField("capacity", kb->getSize(initialBufferSize));
    169     kb->setBaseAddress("sourceBuffer", buffer);
    170     kb->setBufferedSize("sourceBuffer", kb->getSize(0));
    171     kb->setCapacity("sourceBuffer", kb->getSize(initialBufferSize));
    172 }
    173 
    174 void ReadSourceKernel::generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned segmentBlocks, const std::unique_ptr<KernelBuilder> & kb) {
    175 
    176     ConstantInt * const readSize = kb->getSize(getpagesize() * 8/codeUnitWidth);
    177     PointerType * const codeUnitPtrTy = IntegerType::get(kb->getContext(), codeUnitWidth)->getPointerTo();
    178     PointerType * const i8PtrTy = IntegerType::get(kb->getContext(), 8)->getPointerTo();
    179     ConstantInt * const codeUnitBytes = kb->getSize(codeUnitWidth/8);
    180     BasicBlock * const entryBlock = kb->GetInsertBlock();
    181     BasicBlock * const exhaustedBuffer = kb->CreateBasicBlock("ExhaustedBuffer");
    182     BasicBlock * const waitOnConsumers = kb->CreateBasicBlock("WaitOnConsumers");
    183     BasicBlock * const readData = kb->CreateBasicBlock("ReadData");
    184     BasicBlock * const stdInExit = kb->CreateBasicBlock("StdInExit");
    185 
    186     // Check whether we need to read another page of data
    187     ConstantInt * const segmentSize = kb->getSize(segmentBlocks * kb->getBitBlockWidth());
    188     Value * bufferedSize = kb->getBufferedSize("sourceBuffer");
    189     Value * const produced = kb->getProducedItemCount("sourceBuffer");
    190     Value * unreadSize = kb->CreateSub(bufferedSize, produced);
    191 
    192     kb->CreateUnlikelyCondBr(kb->CreateICmpULT(unreadSize, segmentSize), exhaustedBuffer, stdInExit);
    193 
    194     // If so, it checks whether it can simply append another page to the existing buffer or whether
    195     // we need to perform a copyback.
    196 
    197     kb->SetInsertPoint(exhaustedBuffer);
    198 
    199     // Otherwise, we're going to have to perform a copy back...
    200 
    201     // Let L be the logical buffer address (i.e., the position of the "first code unit" of the input stream)
    202     // and B be the address pointing to the beginning of our actual buffer. Check whether:
    203 
    204     //     L + produced + readSize < B + capacity
    205 
    206     // If so, we can append to our existing buffer without impacting any subsequent kernel.
    207 
    208     Value * inputStream = kb->getRawOutputPointer("sourceBuffer", kb->getInt32(0));
    209     Value * const originalPtr = kb->CreateGEP(inputStream, produced);
    210 
    211     Value * const buffer = kb->getScalarField("buffer");
    212     Value * const capacity = kb->getScalarField("capacity");
    213 
    214     Value * L = kb->CreateGEP(originalPtr, readSize);
    215     Value * B = kb->CreateGEP(buffer, capacity);
    216     Value * const canAppend = kb->CreateICmpULT(L, B);
    217 
    218     kb->CreateLikelyCondBr(canAppend, readData, waitOnConsumers);
     138void ReadSourceKernel::generateInitializeMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<KernelBuilder> & b) {
     139    const unsigned pageSize = getpagesize();
     140    const unsigned segmentSize = blocksRequiredPerSegment * b->getBitBlockWidth();
     141    const auto bufferSize = std::max(pageSize * 8, segmentSize * 4);
     142    ConstantInt * const bufferItems = b->getSize(bufferSize);
     143    const auto codeUnitSize = codeUnitWidth / 8;
     144    ConstantInt * const bufferBytes = b->getSize(bufferSize * codeUnitSize);
     145    PointerType * const codeUnitPtrTy = b->getIntNTy(codeUnitWidth)->getPointerTo();
     146    Value * const buffer = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufferBytes), codeUnitPtrTy);
     147    b->setBaseAddress("sourceBuffer", buffer);
     148    b->setScalarField("buffer", buffer);
     149    b->setCapacity("sourceBuffer", bufferItems);
     150}
     151
     152void ReadSourceKernel::generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<KernelBuilder> & b) {
     153
     154    const unsigned pageSize = getpagesize();
     155    const unsigned segmentSize = blocksRequiredPerSegment * b->getBitBlockWidth();
     156    ConstantInt * const itemsToRead = b->getSize(std::max(pageSize, segmentSize * 2));
     157    ConstantInt * const codeUnitBytes = b->getSize(codeUnitWidth / 8);
     158    ConstantInt * const itemsPerSegment = b->getSize(segmentSize);
     159
     160    BasicBlock * const entry = b->GetInsertBlock();
     161    BasicBlock * const checkData = b->CreateBasicBlock("CheckData");
     162    BasicBlock * const moveData = b->CreateBasicBlock("MoveData");
     163    BasicBlock * const prepareBuffer = b->CreateBasicBlock("PrepareBuffer");
     164    BasicBlock * const readData = b->CreateBasicBlock("ReadData");
     165    BasicBlock * const setTermination = b->CreateBasicBlock("SetTermination");
     166    BasicBlock * const readExit = b->CreateBasicBlock("ReadExit");
     167
     168    // Do we have enough unread data to support a segments worth of processing?
     169    Value * const produced = b->getProducedItemCount("sourceBuffer");
     170    Value * const buffered = b->getBufferedSize("sourceBuffer");
     171    Value * const itemsPending = b->CreateAdd(produced, itemsPerSegment);
     172
     173    b->CreateLikelyCondBr(b->CreateICmpULT(itemsPending, buffered), readExit, checkData);
     174
     175    // Can we append to our existing buffer without impacting any subsequent kernel?
     176    b->SetInsertPoint(checkData);
     177    Value * const capacity = b->getCapacity("sourceBuffer");
     178    Value * const readEnd = b->getRawOutputPointer("sourceBuffer", b->CreateAdd(buffered, itemsToRead));
     179    Value * const baseBuffer = b->getScalarField("buffer");
     180    Value * const bufferLimit = b->CreateGEP(baseBuffer, capacity);
     181    b->CreateLikelyCondBr(b->CreateICmpULE(readEnd, bufferLimit), readData, moveData);
    219182
    220183    // First wait on any consumers to finish processing then check how much data has been consumed.
    221     kb->SetInsertPoint(waitOnConsumers);
    222     kb->CreateConsumerWait();
     184    b->SetInsertPoint(moveData);
     185    b->CreateConsumerWait();
    223186
    224187    // Then determine how much data has been consumed and how much needs to be copied back, noting
    225188    // that our "unproduced" data must be block aligned.
    226     const size_t blockAlignment = kb->getBitBlockWidth() / 8;
    227     Constant * const alignmentMask = kb->getSize(-(blockAlignment * 8 / codeUnitWidth));
    228     Value * const consumed = kb->CreateAnd(kb->getConsumedItemCount("sourceBuffer"), alignmentMask);
    229     Value * const remaining = kb->CreateSub(bufferedSize, consumed);
    230     Value * const unconsumedPtr = kb->CreateGEP(inputStream, consumed);
    231     Value * const consumedMajority = kb->CreateICmpULT(kb->CreateGEP(buffer, remaining), unconsumedPtr);
    232     Value * target = buffer;
    233     Value * source = unconsumedPtr;
    234     Value * toCopy = remaining;
    235     if (codeUnitWidth != 8) {
    236         source = kb->CreatePointerCast(unconsumedPtr, i8PtrTy);
    237         toCopy = kb->CreateMul(remaining, codeUnitBytes);
    238     }
    239 
    240     BasicBlock * const copyBack = kb->CreateBasicBlock("CopyBack");
    241     BasicBlock * const expandAndCopyBack = kb->CreateBasicBlock("ExpandAndCopyBack");
    242     BasicBlock * const calculateLogicalAddress = kb->CreateBasicBlock("CalculateLogicalAddress");
    243 
    244     // Have we consumed enough data that we can safely copy back the unconsumed data without needing
    245     // a temporary buffer? (i.e., B + remaining < L + consumed)
    246     kb->CreateLikelyCondBr(consumedMajority, copyBack, expandAndCopyBack);
    247     kb->SetInsertPoint(copyBack);
     189    BasicBlock * const copyBack = b->CreateBasicBlock("CopyBack");
     190    BasicBlock * const expandAndCopyBack = b->CreateBasicBlock("ExpandAndCopyBack");
     191
     192    const auto blockSize = b->getBitBlockWidth() / 8;
     193    Constant * const blockSizeAlignmentMask = ConstantExpr::getNeg(b->getSize(blockSize));
     194    Value * const consumed = b->getConsumedItemCount("sourceBuffer");
     195    Value * const offset = b->CreateAnd(consumed, blockSizeAlignmentMask);
     196    Value * const unreadData = b->getRawOutputPointer("sourceBuffer", offset);
     197    Value * const remainingItems = b->CreateSub(buffered, offset);
     198    Value * const remainingBytes = b->CreateMul(remainingItems, codeUnitBytes);
     199
     200    // Have we consumed enough data that we can safely copy back the unconsumed data without needing a temporary buffer?
     201    Value * const canCopy = b->CreateICmpULT(b->CreateGEP(baseBuffer, remainingItems), b->getRawOutputPointer("sourceBuffer", offset));
     202    b->CreateLikelyCondBr(canCopy, copyBack, expandAndCopyBack);
     203
    248204    // If so, just copy the data ...
    249     if (codeUnitWidth != 8) {
    250         target = kb->CreatePointerCast(buffer, i8PtrTy);
    251     }
    252     kb->CreateMemCpy(target, source, toCopy, 1);
    253     kb->CreateBr(calculateLogicalAddress);
    254    
     205    b->SetInsertPoint(copyBack);
     206    b->CreateMemCpy(baseBuffer, unreadData, remainingBytes, blockSize);
     207    b->CreateBr(prepareBuffer);
     208
    255209    // Otherwise, allocate a buffer with twice the capacity and copy the unconsumed data back into it
    256     kb->SetInsertPoint(expandAndCopyBack);
    257     Value * const expandedCapacity = kb->CreateShl(capacity, 1);
    258     Value * const expandedBytes = codeUnitWidth == 8 ? expandedCapacity : kb->CreateMul(expandedCapacity, codeUnitBytes);
    259     Value * const expandedBuffer = kb->CreatePointerCast(kb->CreateCacheAlignedMalloc(expandedBytes), codeUnitPtrTy);
    260     target = codeUnitWidth == 8 ? expandedBuffer : kb->CreatePointerCast(expandedBuffer, i8PtrTy);
    261     kb->CreateMemCpy(target, source, toCopy, 1);
    262     kb->CreateFree(buffer);
    263     kb->setScalarField("buffer", expandedBuffer);
    264     kb->setScalarField("capacity", expandedCapacity);
    265     kb->setCapacity("sourceBuffer", expandedCapacity);
    266     kb->CreateBr(calculateLogicalAddress);
    267 
    268     // Update the logical address for this buffer....
    269     kb->SetInsertPoint(calculateLogicalAddress);
    270     PHINode * const baseAddress = kb->CreatePHI(codeUnitPtrTy, 2);
    271     baseAddress->addIncoming(buffer, copyBack);
    272     baseAddress->addIncoming(expandedBuffer, expandAndCopyBack);
    273     Value * const logicalAddress = kb->CreateGEP(baseAddress, kb->CreateNeg(consumed));
    274     Value * const modifiedPtr = kb->CreateGEP(baseAddress, remaining);
    275     kb->setBaseAddress("sourceBuffer", logicalAddress);
    276     kb->CreateBr(readData);
     210    b->SetInsertPoint(expandAndCopyBack);
     211    Value * const expandedCapacity = b->CreateShl(capacity, 1);
     212    Value * const expandedBytes = b->CreateMul(expandedCapacity, codeUnitBytes);
     213    Value * const expandedBuffer = b->CreatePointerCast(b->CreateCacheAlignedMalloc(expandedBytes), unreadData->getType());
     214    b->CreateMemCpy(expandedBuffer, unreadData, remainingBytes, blockSize);
     215    b->CreateFree(baseBuffer);
     216    b->setScalarField("buffer", expandedBuffer);
     217    b->setCapacity("sourceBuffer", expandedCapacity);
     218    b->CreateBr(prepareBuffer);
     219
     220    b->SetInsertPoint(prepareBuffer);
     221    PHINode * newBaseBuffer = b->CreatePHI(baseBuffer->getType(), 2);
     222    newBaseBuffer->addIncoming(baseBuffer, copyBack);
     223    newBaseBuffer->addIncoming(expandedBuffer, expandAndCopyBack);
     224    b->setBaseAddress("sourceBuffer", b->CreateGEP(newBaseBuffer, b->CreateNeg(offset)));
     225    b->CreateBr(readData);
    277226
    278227    // Regardless of whether we're simply appending data or had to allocate a new buffer, read a new page
    279     // of data into the input source buffer. If we fail to read a full segment ...
    280     readData->moveAfter(calculateLogicalAddress);
    281     kb->SetInsertPoint(readData);
    282     calculateLogicalAddress->moveAfter(calculateLogicalAddress);
    283     PHINode * const addr = kb->CreatePHI(codeUnitPtrTy, 2);
    284     addr->addIncoming(originalPtr, exhaustedBuffer);
    285     addr->addIncoming(modifiedPtr, calculateLogicalAddress);
    286     Value * const fd = kb->getScalarField("fileDescriptor");
    287 
    288     Value * itemsRead = kb->CreateReadCall(fd, addr, readSize);
    289     if (codeUnitWidth != 8) {
    290         itemsRead = kb->CreateUDiv(itemsRead, codeUnitBytes);
    291     }
    292     unreadSize = kb->CreateAdd(unreadSize, itemsRead);
    293     bufferedSize = kb->CreateAdd(bufferedSize, itemsRead);
    294     kb->setBufferedSize("sourceBuffer", bufferedSize);
    295     Value * const exhaustedInputSource = kb->CreateICmpULT(unreadSize, segmentSize);
    296     BasicBlock * const setTermination = kb->CreateBasicBlock("SetTermination");
    297     kb->CreateUnlikelyCondBr(exhaustedInputSource, setTermination, stdInExit);
    298 
    299     // ... zero out the remaining bytes and set the termination signal.
    300     kb->SetInsertPoint(setTermination);
    301     Value * bytesToZero = kb->CreateSub(segmentSize, unreadSize);
    302     Value * unreadPtr = kb->CreateGEP(addr, unreadSize);
    303     bytesToZero = codeUnitWidth == 8 ? bytesToZero : kb->CreateMul(bytesToZero, codeUnitBytes);
    304     if (codeUnitWidth != 8) {
    305         bytesToZero = kb->CreateMul(bytesToZero, codeUnitBytes);
    306         unreadPtr = kb->CreatePointerCast(unreadPtr, i8PtrTy);
    307     }
    308     kb->CreateMemZero(unreadPtr, bytesToZero);
    309     kb->setCapacity("sourceBuffer", bufferedSize);
    310     kb->setTerminationSignal(kb->CreateICmpEQ(unreadSize, Constant::getNullValue(itemsRead->getType())));
    311     kb->CreateBr(stdInExit);
    312 
    313     // finally add the segment item count to the produced item count to inform the subsequent kernels how
    314     // much data is available for processing
    315     kb->SetInsertPoint(stdInExit);
    316     stdInExit->moveAfter(setTermination);
    317     PHINode * const items = kb->CreatePHI(produced->getType(), 3);
    318     items->addIncoming(segmentSize, entryBlock);
    319     items->addIncoming(segmentSize, readData);
    320     items->addIncoming(unreadSize, setTermination);
    321     kb->setProducedItemCount("sourceBuffer", kb->CreateAdd(produced, items));
     228    // of data into the input source buffer. If we fail to read a full page ...
     229    b->SetInsertPoint(readData);
     230    Value * const sourceBuffer = b->getRawOutputPointer("sourceBuffer", buffered);
     231    Value * const fd = b->getScalarField("fileDescriptor");
     232    Constant * const bytesToRead = ConstantExpr::getMul(itemsToRead, codeUnitBytes);
     233    Value * const bytesRead = b->CreateReadCall(fd, sourceBuffer, bytesToRead);
     234    Value * const itemsRead = b->CreateUDiv(bytesRead, codeUnitBytes);
     235    b->CreateAssert(b->CreateICmpULE(itemsRead, itemsToRead), "read more items than expected");
     236    Value * const itemsBuffered = b->CreateAdd(buffered, itemsRead);
     237    b->setBufferedSize("sourceBuffer", itemsBuffered);
     238    b->CreateUnlikelyCondBr(b->CreateICmpULT(itemsBuffered, itemsPending), setTermination, readExit);
     239
     240    // ... set the termination signal.   
     241    b->SetInsertPoint(setTermination);
     242    Value * const bytesToZero = b->CreateMul(b->CreateSub(itemsPending, itemsBuffered), codeUnitBytes);
     243    b->CreateMemZero(b->getRawOutputPointer("sourceBuffer", itemsBuffered), bytesToZero);
     244    b->setTerminationSignal();
     245    b->CreateBr(readExit);
     246
     247    readExit->moveAfter(setTermination);
     248    b->SetInsertPoint(readExit);
     249    PHINode * const itemsProduced = b->CreatePHI(itemsPending->getType(), 3);
     250    itemsProduced->addIncoming(itemsPending, entry);
     251    itemsProduced->addIncoming(itemsPending, readData);
     252    itemsProduced->addIncoming(itemsBuffered, setTermination);
     253    b->setProducedItemCount("sourceBuffer", itemsProduced);
    322254}
    323255
     
    326258}
    327259
    328 ReadSourceKernel::ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned blocksPerSegment, unsigned codeUnitWidth)
    329 : SegmentOrientedKernel("read_source"  + std::to_string(blocksPerSegment) + "@" + std::to_string(codeUnitWidth)
     260ReadSourceKernel::ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned blocksRequiredPerSegment, const unsigned codeUnitWidth)
     261: SegmentOrientedKernel("read_source"  + std::to_string(blocksRequiredPerSegment) + "@" + std::to_string(codeUnitWidth)
    330262, {}
    331 , {Binding{kb->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}}
    332 , {Binding{kb->getInt32Ty(), "fileDescriptor"}}
     263, {Binding{b->getStreamSetTy(1, codeUnitWidth), "sourceBuffer", FixedRate(), Deferred()}}
     264, {Binding{b->getInt32Ty(), "fileDescriptor"}}
    333265, {}
    334 , {Binding{IntegerType::get(kb->getContext(), codeUnitWidth)->getPointerTo(), "buffer"}, Binding{kb->getSizeTy(), "capacity"}})
    335 , mSegmentBlocks(blocksPerSegment)
     266, {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}})
     267, mBlocksRequiredPerSegment(blocksRequiredPerSegment)
    336268, mCodeUnitWidth(codeUnitWidth) {
    337269
     
    366298    kb->CreateCondBr(kb->CreateICmpEQ(kb->getScalarField("fileDescriptor"), kb->getInt32(STDIN_FILENO)), initializeRead, initializeMMap);
    367299    kb->SetInsertPoint(initializeRead);
    368     ReadSourceKernel::generateInitializeMethod(mCodeUnitWidth, kb);
     300    ReadSourceKernel::generateInitializeMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, kb);
    369301    kb->CreateBr(initializeDone);
    370302    kb->SetInsertPoint(initializeMMap);
    371     MMapSourceKernel::generateInitializeMethod(mFileSizeFunction, mCodeUnitWidth, kb);
     303    MMapSourceKernel::generateInitializeMethod(mFileSizeFunction, mCodeUnitWidth, mBlocksRequiredPerSegment, kb);
    372304    kb->CreateBr(initializeDone);
    373305    kb->SetInsertPoint(initializeDone);
     
    381313    kb->CreateCondBr(kb->CreateICmpEQ(kb->getScalarField("fileDescriptor"), kb->getInt32(STDIN_FILENO)), DoSegmentRead, DoSegmentMMap);
    382314    kb->SetInsertPoint(DoSegmentRead);
    383     ReadSourceKernel::generateDoSegmentMethod(mCodeUnitWidth, mSegmentBlocks, kb);
     315    ReadSourceKernel::generateDoSegmentMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, kb);
    384316    kb->CreateBr(DoSegmentDone);
    385317    kb->SetInsertPoint(DoSegmentMMap);
    386     MMapSourceKernel::generateDoSegmentMethod(mCodeUnitWidth, mSegmentBlocks, kb);
     318    MMapSourceKernel::generateDoSegmentMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, kb);
    387319    kb->CreateBr(DoSegmentDone);
    388320    kb->SetInsertPoint(DoSegmentDone);
    389321}
    390322
    391 FDSourceKernel::FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, unsigned blocksPerSegment, unsigned codeUnitWidth)
    392 : SegmentOrientedKernel("FD_source" + std::to_string(blocksPerSegment) + "@" + std::to_string(codeUnitWidth)
     323FDSourceKernel::FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, const unsigned blocksRequiredPerSegment, const unsigned codeUnitWidth)
     324: SegmentOrientedKernel("FD_source" + std::to_string(blocksRequiredPerSegment) + "@" + std::to_string(codeUnitWidth)
    393325, {}
    394326, {Binding{kb->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}}
    395327, {Binding{kb->getInt32Ty(), "fileDescriptor"}}
    396328, {}
    397 , {Binding{IntegerType::get(kb->getContext(), codeUnitWidth)->getPointerTo(), "buffer"}, Binding{kb->getSizeTy(), "capacity"},
     329, {Binding{kb->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"},
    398330    Binding{kb->getSizeTy(), "fileSize"}, Binding{kb->getInt8PtrTy(), "readableBuffer"}})
    399 , mSegmentBlocks(blocksPerSegment)
     331, mBlocksRequiredPerSegment(blocksRequiredPerSegment)
    400332, mCodeUnitWidth(codeUnitWidth)
    401333, mFileSizeFunction(nullptr) {
     
    418350    BasicBlock * setTermination = kb->CreateBasicBlock("setTermination");
    419351    BasicBlock * mmapSourceExit = kb->CreateBasicBlock("sourceExit");
    420     ConstantInt * segmentItems = kb->getSize(mSegmentBlocks * kb->getBitBlockWidth());
     352    ConstantInt * segmentItems = kb->getSize(mBlocksRequiredPerSegment * kb->getBitBlockWidth());
    421353    Value * fileItems = kb->getScalarField("fileSize");
    422354    if (mCodeUnitWidth > 8) {
     
    439371}
    440372
    441 MemorySourceKernel::MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, Type * type, unsigned blocksPerSegment, unsigned codeUnitWidth)
     373MemorySourceKernel::MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & kb, Type * const type, const unsigned blocksRequiredPerSegment, const unsigned codeUnitWidth)
    442374: SegmentOrientedKernel("memory_source",
    443375    {},
    444376    {Binding{kb->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}},
    445377    {Binding{cast<PointerType>(type), "fileSource"}, Binding{kb->getSizeTy(), "fileSize"}}, {}, {})
    446 , mSegmentBlocks(blocksPerSegment)
     378, mBlocksRequiredPerSegment(blocksRequiredPerSegment)
    447379, mCodeUnitWidth(codeUnitWidth) {
    448380
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.h

    r5706 r5757  
    1414    friend class FDSourceKernel;
    1515public:
    16     MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     16    MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const unsigned blocksRequiredPerSegment = 1, const unsigned codeUnitWidth = 8);
    1717    bool isCachable() const override { return true; }
    1818    bool hasSignature() const override { return false; }
     
    2121    }
    2222    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
    23         generateInitializeMethod(mFileSizeFunction, mCodeUnitWidth, iBuilder);
     23        generateInitializeMethod(mFileSizeFunction, mCodeUnitWidth, mBlocksRequiredPerSegment, iBuilder);
    2424    }
    2525    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
    26         generateDoSegmentMethod(mCodeUnitWidth, mSegmentBlocks, iBuilder);
     26        generateDoSegmentMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, iBuilder);
    2727    }
    2828    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
     
    3131protected:
    3232    static llvm::Function * linkFileSizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    33     static void generateInitializeMethod(llvm::Function * fileSize, const unsigned codeUnitWidth, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    34     static void generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned segmentBlocks, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     33    static void generateInitializeMethod(llvm::Function * fileSize, const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     34    static void generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    3535    static void unmapSourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    3636protected:
    37     const unsigned          mSegmentBlocks;
    38     const unsigned          mCodeUnitWidth;
    39     llvm::Function *        mFileSizeFunction;
     37    const unsigned   mBlocksRequiredPerSegment;
     38    const unsigned   mCodeUnitWidth;
     39    llvm::Function * mFileSizeFunction;
    4040};
    4141
     
    4343    friend class FDSourceKernel;
    4444public:
    45     ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     45    ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const unsigned blocksRequiredPerSegment = 1, const unsigned codeUnitWidth = 8);
    4646    bool isCachable() const override { return true; }
    4747    bool hasSignature() const override { return false; }
    4848    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
    49         generateInitializeMethod(mCodeUnitWidth, iBuilder);
     49        generateInitializeMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, iBuilder);
    5050    }
    5151    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
    52         generateDoSegmentMethod(mCodeUnitWidth, mSegmentBlocks, iBuilder);
     52        generateDoSegmentMethod(mCodeUnitWidth, mBlocksRequiredPerSegment, iBuilder);
    5353    }
    5454    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override {
     
    5656    }
    5757protected:
    58     static void generateInitializeMethod(const unsigned codeUnitWidth, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    59     static void generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned segmentBlocks, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     58    static void generateInitializeMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     59    static void generateDoSegmentMethod(const unsigned codeUnitWidth, const unsigned blocksRequiredPerSegment, const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    6060    static void freeBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    6161private:
    62     unsigned mSegmentBlocks;
    63     unsigned mCodeUnitWidth;
     62    const unsigned mBlocksRequiredPerSegment;
     63    const unsigned mCodeUnitWidth;
    6464};
    6565
    6666class FDSourceKernel final : public SegmentOrientedKernel {
    6767public:
    68     FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     68    FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const unsigned blocksRequiredPerSegment = 1, const unsigned codeUnitWidth = 8);
    6969    bool isCachable() const override { return true; }
    7070    bool hasSignature() const override { return false; }
     
    7474    void generateFinalizeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    7575protected:
    76     const unsigned          mSegmentBlocks;
    77     const unsigned          mCodeUnitWidth;
    78     llvm::Function *        mFileSizeFunction;
     76    const unsigned mBlocksRequiredPerSegment;
     77    const unsigned mCodeUnitWidth;
     78    llvm::Function * mFileSizeFunction;
    7979};
    8080   
    8181class MemorySourceKernel final : public SegmentOrientedKernel {
    8282public:
    83     MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Type * type, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     83    MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Type * const type, const unsigned blocksRequiredPerSegment = 1, const unsigned codeUnitWidth = 8);
    8484    bool hasSignature() const override { return false; }
    8585protected:
     
    8787    void generateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    8888private:
    89     unsigned mSegmentBlocks;
    90     unsigned mCodeUnitWidth;
     89    const unsigned mBlocksRequiredPerSegment;
     90    const unsigned mCodeUnitWidth;
    9191};
    9292
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5755 r5757  
    102102}
    103103
    104 Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
    105     return getBufferedSize(b, handle);
     104Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
     105    return b->getSize(mBufferBlocks * b->getBitBlockWidth());
    106106}
    107107
     
    330330}
    331331
     332Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
     333    return ConstantInt::getAllOnesValue(b->getSizeTy());
     334}
     335
     336
    332337// Circular Buffer
    333338Value * CircularBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * const blockIndex) const {
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5755 r5757  
    206206    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    207207
     208    llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    208209};
    209210
Note: See TracChangeset for help on using the changeset viewer.