Ignore:
Timestamp:
Feb 6, 2018, 4:57:35 PM (16 months ago)
Author:
nmedfort
Message:

More work on the pipeline I/O rate handling

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5856 r5865  
    214214void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & idb) {
    215215
    216     if (mStreamMap.empty()) {
    217         prepareStreamSetNameMap();
    218     }
     216    assert (mStreamMap.empty());
     217
     218    prepareStreamSetNameMap();
    219219
    220220    normalizeStreamProcessingRates();
     
    290290    if (LLVM_UNLIKELY(hasSignature())) {
    291291        generateKernel(idb);
    292         std::string signature;
    293         raw_string_ostream OS(signature);
    294         WriteBitcodeToFile(getModule(), OS);
    295         return signature;
     292        std::string tmp;
     293        raw_string_ostream signature(tmp);
     294        WriteBitcodeToFile(getModule(), signature);
     295        return signature.str();
    296296    } else {
    297297        return getModule()->getModuleIdentifier();
     
    304304 ** ------------------------------------------------------------------------------------------------------------- */
    305305void Kernel::generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    306     assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    307     // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
    308     // generated the unoptimized IR.
    309     if (!mIsGenerated) {
    310         const auto m = idb->getModule();
    311         const auto ip = idb->saveIP();
    312         // const auto saveInstance = getInstance();
    313         idb->setModule(mModule);
    314         addKernelDeclarations(idb);
    315         callGenerateInitializeMethod(idb);
    316         callGenerateDoSegmentMethod(idb);
    317         callGenerateFinalizeMethod(idb);
    318         // setInstance(saveInstance);
    319         idb->setModule(m);
    320         idb->restoreIP(ip);
    321         mIsGenerated = true;
    322     }
     306    assert ("Kernel does not have a valid IDISA Builder" && idb.get());
     307    if (LLVM_UNLIKELY(mIsGenerated)) return;
     308    idb->setModule(mModule);
     309    addKernelDeclarations(idb);
     310    callGenerateInitializeMethod(idb);
     311    callGenerateDoSegmentMethod(idb);
     312    callGenerateFinalizeMethod(idb);
     313    mIsGenerated = true;
    323314}
    324315
     
    685676}
    686677
     678// #define DEBUG_LOG
     679
    687680/** ------------------------------------------------------------------------------------------------------------- *
    688681 * @brief generateKernelMethod
     
    710703            Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
    711704            auto ub = getUpperBound(rate);
     705            assert (ub != 0);
    712706            if (LLVM_UNLIKELY(input.hasLookahead())) {
    713707                ub += RateValue(input.getLookahead(), mStride);
     
    727721        if (requiresTemporaryOutputBuffer(output, rate)) {
    728722            auto ub = getUpperBound(rate);
    729             if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i]->supportsCopyBack() && requiresCopyBack(rate))) {
    730                 ub += mStreamSetOutputBuffers[i]->overflowSize();
    731             }
    732             Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
    733             Constant * const arraySize = b->getInt64(ceiling(ub));
    734             AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    735             assert (ptr->isStaticAlloca());
    736             temporaryOutputBuffer[i] = ptr;
     723            if (ub > 0) {
     724                if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i]->supportsCopyBack() && requiresCopyBack(rate))) {
     725                    ub += mStreamSetOutputBuffers[i]->overflowSize();
     726                }
     727                Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
     728                Constant * const arraySize = b->getInt64(ceiling(ub));
     729                AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
     730                assert (ptr->isStaticAlloca());
     731                temporaryOutputBuffer[i] = ptr;
     732            }
    737733        }
    738734    }
     
    762758
    763759    Value * const initiallyFinal = mIsFinal;
    764 
    765 //    b->CallPrintInt(getName() + "_initiallyFinal", initiallyFinal);
    766 
     760    #ifdef DEBUG_LOG
     761    b->CallPrintInt(getName() + "_initiallyFinal", initiallyFinal);
     762    #endif
    767763    // Now proceed with creation of the doSegment method.
    768764    BasicBlock * const segmentLoop = b->CreateBasicBlock("SegmentLoop");
     
    791787        const auto & name = input.getName();
    792788        Value * const processed = b->getProcessedItemCount(name);
    793 
    794 //        b->CallPrintInt(getName() + "_" + name + "_avail", mAvailableItemCount[i]);
    795 //        b->CallPrintInt(getName() + "_" + name + "_processed", processed);
    796 
     789        #ifdef DEBUG_LOG
     790        b->CallPrintInt(getName() + "_" + name + "_avail", mAvailableItemCount[i]);
     791        b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     792        #endif
    797793        mInitialProcessedItemCount[i] = processed;
    798794        mStreamSetInputBaseAddress[i] = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
     
    802798        }
    803799
    804         Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);       
    805 //        b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
    806 
     800        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
     801        #ifdef DEBUG_LOG
     802        b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
     803        #endif
    807804        Value * const accessible = b->getLinearlyAccessibleItems(name, processed, unprocessed);
    808 //        b->CallPrintInt(getName() + "_" + name + "_accessible", accessible);
    809 
     805        #ifdef DEBUG_LOG
     806        b->CallPrintInt(getName() + "_" + name + "_accessible", accessible);
     807        #endif
    810808        mAvailableItemCount[i] = unprocessed;
    811 
    812809        linearlyAccessible[i] = accessible;
    813810        inputStrideSize[i] = getStrideSize(b, input.getRate());
     
    926923        const auto & name = output.getName();
    927924        Value * const produced = b->getProducedItemCount(name);
    928 //        b->CallPrintInt(getName() + "_" + name + "_produced", produced);
    929 
     925        #ifdef DEBUG_LOG
     926        b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     927        #endif
    930928        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    931929        mInitialProducedItemCount[i] = produced;
    932930        mStreamSetOutputBaseAddress[i] = baseBuffer;
    933 
     931        linearlyWritable[i] = nullptr;
    934932        // Is the number of linearly writable items sufficient for a stride?
    935933        outputStrideSize[i] = getStrideSize(b, output.getRate());
     
    986984        const ProcessingRate & rate = input.getRate();
    987985        if (rate.isFixed() && input.nonDeferred()) {
    988 //            b->CallPrintInt(getName() + "_" + input.getName() + "_processed (+)", mAvailableItemCount[i]);
    989986            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    990987            b->setProcessedItemCount(input.getName(), ic);
     
    998995            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    999996            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
    1000 //            b->CallPrintInt(getName() + "_" + output.getName() + "_produced (+)", produced);
    1001997            b->setProducedItemCount(output.getName(), ic);
    1002998        }
     
    10261022    // Copy back data to the actual output buffers.
    10271023    for (unsigned i = 0; i < outputSetCount; i++) {
    1028 
    10291024        AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    10301025        if (LLVM_UNLIKELY(tempBuffer == nullptr)) {
    10311026            continue;
    10321027        }
    1033 
    10341028        const auto & name = mStreamSetOutputs[i].getName();
    10351029        Value * const produced = b->getProducedItemCount(name);
     
    10481042        //Value * const newProducedItemCount = b->getProducedItemCount(name);
    10491043        Value * const newlyProduced = b->CreateSub(produced, mInitialProducedItemCount[i]);
     1044
     1045
    10501046        Value * const toWrite = b->CreateUMin(newlyProduced, linearlyWritable[i]);
    10511047        const auto alignment = getItemAlignment(mStreamSetOutputs[i]);
     
    11041100        const auto & name = mStreamSetOutputs[i].getName();
    11051101        Value * const produced = b->getProducedItemCount(name);
     1102
    11061103        // If this output has a Fixed/Bounded rate, determine whether we have room for another stride.
    11071104        if (LLVM_LIKELY(outputStrideSize[i] != nullptr)) {
     
    11121109            }
    11131110            Value * const unconsumed = b->CreateSub(produced, consumed);
     1111
     1112//            b->CallPrintInt(getName() + "_" + name + "_unconsumed", unconsumed);
     1113
    11141114            Value * const capacity = b->getBufferedSize(name);
     1115
     1116//            b->CallPrintInt(getName() + "_" + name + "_capacity", capacity);
     1117
    11151118            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    11161119                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity),
    1117                                 getName() + ": " + name + " unconsumed data exceeds capacity");
    1118             }
     1120                                getName() + ": " + name + " more data was written than its capacity allows");
     1121            }
     1122
     1123
     1124
    11191125            Value * const remaining = b->CreateSub(capacity, unconsumed);
    11201126            Value * const hasRemainingStrides = b->CreateICmpUGE(remaining, outputStrideSize[i]);
Note: See TracChangeset for help on using the changeset viewer.