Ignore:
Timestamp:
Feb 2, 2018, 2:49:08 PM (15 months ago)
Author:
nmedfort
Message:

Revised pipeline structure to better control I/O rates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5852 r5856  
    761761    mStreamSetInputBaseAddress.resize(inputSetCount);
    762762
     763    Value * const initiallyFinal = mIsFinal;
     764
     765//    b->CallPrintInt(getName() + "_initiallyFinal", initiallyFinal);
     766
    763767    // Now proceed with creation of the doSegment method.
    764768    BasicBlock * const segmentLoop = b->CreateBasicBlock("SegmentLoop");
     
    787791        const auto & name = input.getName();
    788792        Value * const processed = b->getProcessedItemCount(name);
     793
     794//        b->CallPrintInt(getName() + "_" + name + "_avail", mAvailableItemCount[i]);
     795//        b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     796
    789797        mInitialProcessedItemCount[i] = processed;
    790798        mStreamSetInputBaseAddress[i] = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
     
    793801                            getName() + ": " + name + " processed item count exceeds its available item count");
    794802        }
    795         Value * const unprocessed = b->CreateNUWSub(mAvailableItemCount[i], processed);
     803
     804        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);       
     805//        b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
     806
     807        Value * const accessible = b->getLinearlyAccessibleItems(name, processed, unprocessed);
     808//        b->CallPrintInt(getName() + "_" + name + "_accessible", accessible);
     809
    796810        mAvailableItemCount[i] = unprocessed;
    797         Value * const accessible = b->getLinearlyAccessibleItems(name, processed, unprocessed);
     811
    798812        linearlyAccessible[i] = accessible;
    799813        inputStrideSize[i] = getStrideSize(b, input.getRate());
     
    809823    // P is the current processed position, L is the lookahead amount and n is our number of accessible strides ∈ â„€+.
    810824    b->SetInsertPoint(checkInputAvailability);
    811     Value * const initiallyFinal = mIsFinal;
    812825    Value * linearlyCopyable[inputSetCount];
    813826    PHINode * selectedInputBuffer[inputSetCount];
     
    831844            if (LLVM_UNLIKELY(input.hasLookahead())) {
    832845                Constant * const lookahead = b->getSize(input.getLookahead());
    833                 strideSize = b->CreateNUWAdd(strideSize, lookahead);
     846                strideSize = b->CreateAdd(strideSize, lookahead);
    834847            }
    835848            Value * const requiresCopy = b->CreateICmpULT(accessible, strideSize);
     
    913926        const auto & name = output.getName();
    914927        Value * const produced = b->getProducedItemCount(name);
     928//        b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     929
    915930        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    916931        mInitialProducedItemCount[i] = produced;
     
    935950                b->SetInsertPoint(prepareTempBuffer);
    936951                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    937                 b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
     952                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);               
    938953                b->CreateBr(resume);
    939954                // Select the appropriate buffer / stride #
     
    971986        const ProcessingRate & rate = input.getRate();
    972987        if (rate.isFixed() && input.nonDeferred()) {
     988//            b->CallPrintInt(getName() + "_" + input.getName() + "_processed (+)", mAvailableItemCount[i]);
    973989            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    974990            b->setProcessedItemCount(input.getName(), ic);
     
    981997        if (rate.isFixed()) {
    982998            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    983             Value * const ic = b->CreateNUWAdd(mInitialProducedItemCount[i], produced);
     999            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
     1000//            b->CallPrintInt(getName() + "_" + output.getName() + "_produced (+)", produced);
    9841001            b->setProducedItemCount(output.getName(), ic);
    9851002        }
     
    10301047        Value * const offset = b->CreateAnd(mInitialProducedItemCount[i], BLOCK_WIDTH_MASK);
    10311048        //Value * const newProducedItemCount = b->getProducedItemCount(name);
    1032         Value * const newlyProduced = b->CreateNUWSub(produced, mInitialProducedItemCount[i]);
     1049        Value * const newlyProduced = b->CreateSub(produced, mInitialProducedItemCount[i]);
    10331050        Value * const toWrite = b->CreateUMin(newlyProduced, linearlyWritable[i]);
    10341051        const auto alignment = getItemAlignment(mStreamSetOutputs[i]);
     
    10381055
    10391056        b->SetInsertPoint(copyToFront);
    1040         Value * const remaining = b->CreateNUWSub(newlyProduced, toWrite);
     1057        Value * const remaining = b->CreateSub(newlyProduced, toWrite);
    10411058        Value * const baseAddress = b->getBaseAddress(name);
    10421059        b->CreateStreamCpy(name, baseAddress, ZERO, tempBuffer, toWrite, remaining, alignment);
     
    10481065    //  We've dealt with the partial block processing and copied information back into the
    10491066    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    1050     BasicBlock * const setTermination = b->CreateBasicBlock("setTermination");
    1051     b->CreateCondBr(mIsFinal, setTermination, strideDone);
    1052     b->SetInsertPoint(setTermination);
    1053     b->setTerminationSignal();
    10541067    BasicBlock * const segmentDone = b->CreateBasicBlock("SegmentDone");
    1055     b->CreateBr(segmentDone);
     1068    if (canTerminateEarly()) {
     1069        mIsFinal = b->CreateOr(mIsFinal, b->getTerminationSignal());
     1070    }
     1071    b->CreateCondBr(mIsFinal, segmentDone, strideDone);
    10561072
    10571073    /// STRIDE DONE
     
    10661082        Value * const avail = mInitialAvailableItemCount[i];
    10671083        Value * const processed = b->getProcessedItemCount(name);
     1084//        b->CallPrintInt(getName() + "_" + name + "_processed'", processed);
     1085
    10681086        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    10691087            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + ": " + name + " processed data exceeds available data");
    10701088        }
    1071         Value * remaining = b->CreateSub(avail, processed);
     1089        Value * const remaining = b->CreateSub(avail, processed);
    10721090        Value * strideSize = inputStrideSize[i];
    10731091        if (LLVM_UNLIKELY(input.hasLookahead())) {
    1074             strideSize = b->CreateNUWAdd(strideSize, b->getSize(input.getLookahead()));
     1092            strideSize = b->CreateAdd(strideSize, b->getSize(input.getLookahead()));
    10751093        }
    10761094        Value * const hasRemainingStrides = b->CreateICmpUGE(remaining, strideSize);
    10771095        hasMoreStrides = b->CreateAnd(hasMoreStrides, hasRemainingStrides);
    10781096    }
     1097
    10791098    // even if we do not have enough input data for a full stride, if this is our final stride, allow it ...
    10801099    hasMoreStrides = b->CreateOr(hasMoreStrides, initiallyFinal);
     
    10931112            }
    10941113            Value * const unconsumed = b->CreateSub(produced, consumed);
    1095             Value * const capacity = b->getCapacity(name);
     1114            Value * const capacity = b->getBufferedSize(name);
    10961115            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    10971116                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity),
     
    11131132
    11141133            b->SetInsertPoint(copyBack);
    1115             Value * const baseAddress = b->getBaseAddress(name);
    1116             const StreamSetBuffer * const buf = this->getAnyStreamSetBuffer(name);
    1117             const auto numOfStreams = buf->getNumOfStreams();
    1118             const auto itemWidth = getItemWidth(this->getBinding(name));
    1119 
    1120             const auto sizeByBit = b->CreateMul(b->CreateMul(b->getSize(itemWidth), bufferSize), b->getSize(numOfStreams));
    1121             const auto sizeByByte = b->CreateUDiv(sizeByBit, b->getSize(8));
    1122             const auto sourcePtr = b->CreateGEP(b->CreatePointerCast(baseAddress, b->getInt8PtrTy()), sizeByByte);
    1123             const auto targetPtr = b->CreatePointerCast(baseAddress, b->getInt8PtrTy());
    1124 
    1125             const auto itemsToBeCopyByBit = b->CreateMul(b->CreateMul(b->getSize(itemWidth), current), b->getSize(numOfStreams));
    1126             const auto itemsToBeCopyByByte = b->CreateUDiv(itemsToBeCopyByBit, b->getSize(8));
    1127             b->CreateMemCpy(targetPtr, sourcePtr, itemsToBeCopyByByte, 8);
    1128 
     1134            const auto copyAlignment = getItemAlignment(mStreamSetOutputs[i]);
     1135            Value * const startOfBuffer = b->getBaseAddress(name);
     1136            Value * const offset = b->CreateUDiv(bufferSize, b->getSize(b->getBitBlockWidth()));
     1137            Value * const endOfBuffer = b->CreateGEP(startOfBuffer, offset);
     1138            b->CreateStreamCpy(name, startOfBuffer, ZERO, endOfBuffer, ZERO, current, copyAlignment);
    11291139            b->CreateBr(done);
    11301140
     
    16181628, mCurrentMethod(nullptr)
    16191629, mAvailablePrincipalItemCount(nullptr)
    1620 , mIsGenerated(false)
    16211630, mStride(0)
    16221631, mIsFinal(nullptr)
    1623 , mOutputScalarResult(nullptr) {
     1632, mOutputScalarResult(nullptr)
     1633, mIsGenerated(false) {
    16241634
    16251635}
Note: See TracChangeset for help on using the changeset viewer.