Changeset 6266


Ignore:
Timestamp:
Jan 3, 2019, 5:13:04 PM (3 months ago)
Author:
nmedfort
Message:

Refactoring of termination logic to use counters.

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.files

    r6253 r6266  
    33kernels/optimizationbranch.cpp
    44kernels/optimizationbranch.h
     5kernels/pipeline/termination_logic.hpp
    56wc.cpp
    67base64.cpp
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.cpp

    r6261 r6266  
    11#include "optimizationbranch.h"
    22#include <kernels/kernel_builder.h>
     3#include <boost/scoped_ptr.hpp>
    34
    45#warning at compilation, this must verify that the I/O rates of the branch permits the rates of the branches
     
    6768 ** ------------------------------------------------------------------------------------------------------------- */
    6869void OptimizationBranch::callKernel(const std::unique_ptr<KernelBuilder> & b,
    69                                     const Kernel * const kernel, std::vector<Value *> & args,
     70                                    const Kernel * const kernel,
     71                                    Value * const first, Value * const last,
    7072                                    PHINode * const terminatedPhi) {
    71     args[0] = kernel->getHandle();
     73#if 0
     74    std::vector<Value *> args;
     75    args.reserve(mCurrentMethod->arg_size());
     76    args.push_back(kernel->getHandle()); // handle
     77    args.push_back(b->CreateSub(last, first)); // numOfStrides
     78    const auto numOfInputs = kernel->getNumOfStreamInputs();
     79    for (unsigned i = 0; i < numOfInputs; i++) {
     80
     81        const Binding & input = kernel->getInputStreamSetBinding(i);
     82        const StreamSetBuffer * const buffer = mStreamSetInputBuffers[i];
     83        // logical base input address
     84        args.push_back(buffer->getBaseAddress(b.get()));
     85        // processed input items
     86        args.push_back(mProcessedInputItems[i]);
     87        // accessible input items (after non-deferred processed item count)
     88        args.push_back();
     89        // TODO: What if one of the branches requires this but the other doesn't?
     90        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     91            args.push_back(b->CreateGEP(mPopCountRateArray[i], first));
     92        }
     93        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     94            args.push_back(b->CreateGEP(mNegatedPopCountRateArray[i], first));
     95        }
     96    }
     97
     98    const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     99    for (unsigned i = 0; i < numOfOutputs; ++i) {
     100        const Binding & output = kernel->getOutputStreamSetBinding(i);
     101        const auto unmanaged = !output.hasAttribute(AttrId::ManagedBuffer);
     102        if (unmanaged) {
     103            const StreamSetBuffer * const buffer = mStreamSetOutputBuffers[i];
     104            args.push_back(buffer->getBaseAddress(b.get()));
     105        }
     106        args.push_back(mProducedOutputItems[i]);
     107        if (unmanaged) {
     108            // writable
     109        } else {
     110            // consumed
     111        }
     112    }
     113
    72114    Value * terminated = b->CreateCall(kernel->getDoSegmentFunction(b->getModule()), args);
    73115    if (terminatedPhi) {
     
    77119        terminatedPhi->addIncoming(terminated, b->GetInsertBlock());
    78120    }
     121#endif
    79122}
    80123
     
    94137    Constant * const ONE = b->getSize(1);
    95138
    96     const auto numOfInputs = getNumOfStreamInputs();
     139    const auto numOfConditionInputs = isa<StreamSet>(mCondition) ? 1 : 0;
     140    const auto numOfInputs = getNumOfStreamInputs() - numOfConditionInputs;
    97141    std::vector<Value *> initialInputItems(numOfInputs, nullptr);
     142
     143    mPartialAccessibleInputItems.resize(numOfInputs);
     144
    98145    for (unsigned i = 0; i < numOfInputs; ++i) {
    99         if (isParamConstant(mInputStreamSets[i])) {
     146        if (isParamAddressable(mInputStreamSets[i])) {
    100147            initialInputItems[i] = b->CreateLoad(mProcessedInputItemPtr[i]);
    101148        }
     149        mPartialAccessibleInputItems[i] = mAccessibleInputItems[i];
    102150    }
    103151
     
    105153    std::vector<Value *> initialOutputItems(numOfOutputs, nullptr);
    106154    for (unsigned i = 0; i < numOfOutputs; ++i) {
    107         if (isParamConstant(mOutputStreamSets[i])) {
     155        if (isParamAddressable(mOutputStreamSets[i])) {
    108156            initialOutputItems[i] = b->CreateLoad(mProducedOutputItemPtr[i]);
    109157        }
     158        mPartialAccessibleInputItems[i] = mAccessibleInputItems[i];
    110159    }
    111160
     
    113162    b->CreateBr(loopCond);
    114163
    115     std::vector<Value *> args;
    116164    PHINode * terminatedPhi = nullptr;
    117165    if (canSetTerminateSignal()) {
     
    121169
    122170    b->SetInsertPoint(loopCond);
     171    IntegerType * const sizeTy = b->getSizeTy();
     172    PHINode * const first = b->CreatePHI(sizeTy, 3);
     173    first->addIncoming(ZERO, entry);
     174    PHINode * const last = b->CreatePHI(sizeTy, 3);
     175
     176    mProcessedInputItems.resize(numOfInputs);
     177    for (unsigned i = 0; i < numOfInputs; ++i) {
     178        if (initialInputItems[i]) {
     179            PHINode * const inputPhi = b->CreatePHI(sizeTy, 3);
     180            inputPhi->addIncoming(initialInputItems[i], entry);
     181            mProcessedInputItems[i] = inputPhi;
     182        } else {
     183            mProcessedInputItems[i] = mProcessedInputItemPtr[i];
     184        }
     185    }
     186
     187    mProducedOutputItems.resize(numOfOutputs);
     188    for (unsigned i = 0; i < numOfOutputs; ++i) {
     189        if (initialOutputItems[i]) {
     190            PHINode * const outputPhi = b->CreatePHI(sizeTy, 3);
     191            outputPhi->addIncoming(initialOutputItems[i], entry);
     192            mProducedOutputItems[i] = outputPhi;
     193        } else {
     194            mProducedOutputItems[i] = mProducedOutputItemPtr[i];
     195        }
     196    }
    123197
    124198    if (LLVM_LIKELY(isa<StreamSet>(mCondition))) {
    125199
    126         Type * const BitBlockTy = b->getBitBlockType();
    127         IntegerType * const sizeTy = b->getSizeTy();
    128 
    129         PHINode * const index = b->CreatePHI(sizeTy, 3);
    130         index->addIncoming(ZERO, entry);
    131         PHINode * const first = b->CreatePHI(sizeTy, 3);
    132         first->addIncoming(ZERO, entry);
     200        last->addIncoming(ZERO, entry);
     201
    133202        PHINode * const state = b->CreatePHI(b->getInt1Ty(), 3);
    134203        state->addIncoming(b->getFalse(), entry);
    135 
    136         const auto numOfInputs = getNumOfStreamInputs() - 1; // the final input is our condition stream
    137         std::vector<PHINode *> inputPhis(numOfInputs);
    138         for (unsigned i = 0; i < numOfInputs; ++i) {
    139             PHINode * const inputPhi = b->CreatePHI(sizeTy, 3);
    140             inputPhi->addIncoming(getAccessibleInputItems(i), entry);
    141             inputPhis[i] = inputPhi;
    142         }
    143 
    144         const auto numOfOutputs = getNumOfStreamOutputs();
    145         std::vector<PHINode *> outputPhis(numOfOutputs);
    146         for (unsigned i = 0; i < numOfOutputs; ++i) {
    147             PHINode * const outputPhi = b->CreatePHI(sizeTy, 3);
    148             outputPhi->addIncoming(getWritableInputItems(i), entry);
    149             outputPhis[i] = outputPhi;
    150         }
    151 
    152204
    153205        BasicBlock * const summarizeOneStride = b->CreateBasicBlock("summarizeOneStride", nonZeroPath);
     
    161213
    162214
    163         Value * const offset = b->CreateMul(index, strideCount);
     215        Value * const offset = b->CreateMul(last, strideCount);
    164216        Value * basePtr = b->getInputStreamBlockPtr(CONDITION_TAG, ZERO, offset);
     217        Type * const BitBlockTy = b->getBitBlockType();
    165218        basePtr = b->CreatePointerCast(basePtr, BitBlockTy->getPointerTo());
    166219        b->CreateBr(summarizeOneStride);
     
    186239        Value * const nextState = b->bitblock_any(merged);
    187240        Value * const sameState = b->CreateICmpEQ(nextState, state);
    188         Value * const firstStride = b->CreateICmpEQ(index, ZERO);
     241        Value * const firstStride = b->CreateICmpEQ(last, ZERO);
    189242        Value * const continuation = b->CreateOr(sameState, firstStride);
    190         Value * const nextIndex = b->CreateAdd(index, ONE);
     243        Value * const nextIndex = b->CreateAdd(last, ONE);
    191244        Value * const notLastStride = b->CreateICmpNE(nextIndex, mNumOfStrides);
    192245        Value * const checkNextStride = b->CreateAnd(continuation, notLastStride);
    193         index->addIncoming(nextIndex, checkStride);
     246        last->addIncoming(nextIndex, checkStride);
    194247        first->addIncoming(first, checkStride);
    195248        state->addIncoming(nextState, checkStride);
     
    198251        // Process every stride between [first, index)
    199252        b->SetInsertPoint(processStrides);
    200 
    201         // build our kernel call
    202         args.reserve(mCurrentMethod->arg_size());
    203         args.push_back(nullptr); // handle
    204         args.push_back(b->CreateSub(index, first)); // numOfStrides
    205         for (unsigned i = 0; i < numOfInputs; i++) {
    206             const StreamSetBuffer * const buffer = mStreamSetInputBuffers[i];
    207             // logical base input address
    208             args.push_back(buffer->getBaseAddress(b.get()));
    209 
    210             // processed input items
    211             const Binding & input = mInputStreamSets[i];
    212             if (isParamAddressable(input)) {
    213                 args.push_back(mProcessedInputItemPtr[i]); // updatable
    214             }  else if (isParamConstant(input)) {
    215                 args.push_back(b->CreateLoad(mProcessedInputItemPtr[i]));  // constant
    216             }
    217 
    218             // accessible input items (after non-deferred processed item count)
    219             args.push_back(sizeTy);
    220 
    221             if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    222                 args.push_back(b->CreateGEP(mPopCountRateArray[i], first));
    223             }
    224             if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    225                 args.push_back(b->CreateGEP(mNegatedPopCountRateArray[i], first));
    226             }
    227         }
    228 
    229253        // state is implicitly "indeterminate" during our first stride
    230         Value * const currentState = b->CreateSelect(firstStride, nextState, state);
    231         b->CreateCondBr(currentState, nonZeroPath, allZeroPath);
     254        Value * const selectedPath = b->CreateSelect(firstStride, nextState, state);
     255        b->CreateCondBr(selectedPath, nonZeroPath, allZeroPath);
    232256
    233257    } else {
     258        last->addIncoming(mNumOfStrides, entry);
    234259
    235260        Value * const cond = b->getScalarField(CONDITION_TAG);
    236         const auto n = mCurrentMethod->arg_size();
    237         args.resize(n);
    238         auto arg = mCurrentMethod->arg_begin();
    239         for (unsigned i = 1; i != n; ++i) {
    240             assert (arg != mCurrentMethod->arg_end());
    241             args[i] = *(++arg);
    242         }
    243         assert (args[0] == nullptr);
    244261        b->CreateCondBr(b->CreateIsNotNull(cond), nonZeroPath, allZeroPath);
    245262    }
     
    247264    // make the actual calls and take any potential termination signal
    248265    b->SetInsertPoint(nonZeroPath);
    249     callKernel(b, mTrueKernel, args, terminatedPhi);
     266    callKernel(b, mTrueKernel, first, last, terminatedPhi);
    250267    b->CreateBr(mergePaths);
    251268
    252269    b->SetInsertPoint(allZeroPath);
    253     callKernel(b, mFalseKernel, args, terminatedPhi);
     270    callKernel(b, mFalseKernel, first, last, terminatedPhi);
    254271    b->CreateBr(mergePaths);
    255272
     273
     274    b->SetInsertPoint(mergePaths);
    256275
    257276#endif
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.h

    r6261 r6266  
    4949
    5050    void callKernel(const std::unique_ptr<KernelBuilder> & b,
    51                     const Kernel * const kernel, std::vector<llvm::Value *> & args,
     51                    const Kernel * const kernel, llvm::Value * const first, llvm::Value * const last,
    5252                    llvm::PHINode * const terminatedPhi);
    5353
    5454private:
    5555
    56     Relationship * const    mCondition;
    57     Kernel * const          mTrueKernel;
    58     Kernel * const          mFalseKernel;
     56    Relationship * const        mCondition;
     57    Kernel * const              mTrueKernel;
     58    Kernel * const              mFalseKernel;
     59    std::vector<llvm::Value *>  mProcessedInputItems;
     60    std::vector<llvm::Value *>  mPartialAccessibleInputItems;
    5961
     62    std::vector<llvm::Value *>  mProducedOutputItems;
    6063};
    6164
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6262 r6266  
    2626    mProgressCounter->addIncoming(ZERO, entryBlock);
    2727    mPipelineProgress = b->getFalse();
    28     // any pipeline input streams are considered produced by the P_{in} vertex.
    29     mTerminationGraph[0] = mPipelineKernel->isFinal();
    3028    #ifdef PRINT_DEBUG_MESSAGES
    3129    b->CallPrintInt("+++ pipeline start +++", mSegNo);
     
    153151    b->SetInsertPoint(mKernelTerminated);
    154152    zeroFillPartiallyWrittenOutputStreams(b);
    155     Value * mode = setTerminated(b, mTerminatedExplicitly, TerminatedExplicitly, TerminatedNormally);
    156     updatePhisAfterTermination(b, mode);
     153    setTerminated(b);
     154    updatePhisAfterTermination(b);
    157155    b->CreateBr(mKernelLoopExit);
    158156
     
    162160
    163161    b->SetInsertPoint(mKernelLoopExit);
     162    updateTerminationSignal(mTerminatedPhi);
    164163    writeUpdatedItemCounts(b);
    165164    computeFullyProcessedItemCounts(b);
     
    180179    b->SetInsertPoint(mKernelExit);
    181180    mKernelExit->moveAfter(mKernelLoopExitPhiCatch);
     181    updateTerminationSignal(mTerminatedAtExitPhi);
    182182    writeFinalConsumedItemCounts(b);
    183183    updatePopCountReferenceCounts(b);
     
    189189
    190190/** ------------------------------------------------------------------------------------------------------------- *
    191  * @brief next
     191 * @brief end
    192192 ** ------------------------------------------------------------------------------------------------------------- */
    193193void PipelineCompiler::end(BuilderRef b, const unsigned step) {
     194
     195    // A pipeline will end for one or two reasons:
     196
     197    // 1) No progress can be made by any kernel. This ought to only occur
     198    // if the pipeline itself has I/O streams.
     199
     200    // 2) All pipeline sinks have terminated (i.e., any kernel that writes
     201    // to a pipeline output, is marked as having a side-effect, or produces
     202    // an input for some call in which no dependent kernels is a pipeline
     203    // sink).
     204
    194205    b->setKernel(mPipelineKernel);
    195206
     
    201212    Value * const noProgress = b->CreateICmpEQ(newProgressCounter, TWO);
    202213
    203     const auto pipelineOutput = mLastKernel;
    204 
    205     // check whether every sink has terminated
    206     Value * allTerminated = b->getTrue();
    207     for (const auto e : make_iterator_range(in_edges(pipelineOutput, mTerminationGraph))) {
    208         const auto kernelVertex = source(e, mTerminationGraph);
    209         Value * const kernelState = mTerminationGraph[kernelVertex];
    210         Value * const terminated = b->CreateICmpNE(kernelState, b->getSize(NotTerminated));
    211         allTerminated = b->CreateAnd(allTerminated, terminated);
    212     }
    213 
    214     Value * done = allTerminated;
     214    Value * const terminated = pipelineTerminated(b);
     215    Value * done = terminated;
    215216    if (nestedPipeline()) {
    216         done = b->CreateOr(allTerminated, noProgress);
     217        done = b->CreateOr(terminated, noProgress);
    217218    } else if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    218219        b->CreateAssertZero(noProgress,
     
    235236    b->setKernel(mPipelineKernel);
    236237    if (mPipelineTerminated) {
    237         b->CreateStore(allTerminated, mPipelineTerminated);
     238        b->CreateStore(terminated, mPipelineTerminated);
    238239    }
    239240}
     
    317318    const auto prefix = makeKernelName(mKernelIndex);
    318319    IntegerType * const sizeTy = b->getSizeTy();
     320    IntegerType * const boolTy = b->getInt1Ty();
    319321    mTerminatedPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
    320     mHasProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_anyProgress");
    321 
     322    mHasProgressedPhi = b->CreatePHI(boolTy, 2, prefix + "_anyProgress");
    322323    const auto numOfInputs = mKernel->getNumOfStreamInputs();
    323324    for (unsigned i = 0; i < numOfInputs; ++i) {
     
    343344    b->SetInsertPoint(mKernelExit);
    344345    const auto prefix = makeKernelName(mKernelIndex);
    345     IntegerType * const sizeTy = b->getSizeTy();
    346 
    347     PHINode * const terminated = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
    348     terminated->addIncoming(mTerminatedInitially, mKernelEntry);
    349     terminated->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
    350     mTerminationGraph[mKernelIndex] = terminated;
     346    Type * const sizeTy = b->getSizeTy();
     347    mTerminatedAtExitPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
     348    mTerminatedAtExitPhi->addIncoming(mTerminatedInitially, mKernelEntry);
     349    mTerminatedAtExitPhi->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
    351350
    352351    PHINode * const pipelineProgress = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_pipelineProgress");
     
    403402        }
    404403        mHasProgressedPhi->addIncoming(b->getTrue(), entryBlock);
    405         mTerminatedPhi->addIncoming(b->getSize(NotTerminated), entryBlock);
     404        mTerminatedPhi->addIncoming(mTerminatedInitially, entryBlock);
    406405        b->CreateBr(mKernelLoopExit);
    407406    }
     
    409408
    410409/** ------------------------------------------------------------------------------------------------------------- *
    411  * @brief getInitialTerminationSignal
    412  ** ------------------------------------------------------------------------------------------------------------- */
    413 inline Value * PipelineCompiler::initiallyTerminated(BuilderRef b) {
    414     b->setKernel(mPipelineKernel);
    415     const auto prefix = makeKernelName(mKernelIndex);
    416     mTerminatedInitially = b->getScalarField(prefix + TERMINATION_SIGNAL_SUFFIX);
    417     b->setKernel(mKernel);
    418     return b->CreateICmpNE(mTerminatedInitially, b->getSize(NotTerminated));
    419 }
    420 
    421 /** ------------------------------------------------------------------------------------------------------------- *
    422  * @brief setTerminated
    423  ** ------------------------------------------------------------------------------------------------------------- */
    424 Value * PipelineCompiler::setTerminated(BuilderRef b, Value * const condition, const TerminationMode trueMode, const TerminationMode falseMode) const  {
    425     const auto prefix = makeKernelName(mKernelIndex);
    426     b->setKernel(mPipelineKernel);
    427     ConstantInt * const TRUE_MODE = b->getSize(trueMode);
    428     ConstantInt * const FALSE_MODE = b->getSize(falseMode);
    429     Value * const mode = b->CreateSelect(condition, TRUE_MODE, FALSE_MODE);
    430     b->setScalarField(prefix + TERMINATION_SIGNAL_SUFFIX, mode);
    431     #ifdef PRINT_DEBUG_MESSAGES
    432     b->CallPrintInt("*** " + prefix + "_terminated ***", mode);
    433     #endif
    434     b->setKernel(mKernel);
    435     return mode;
    436 }
    437 
    438 /** ------------------------------------------------------------------------------------------------------------- *
    439410 * @brief updatePhiCountAfterTermination
    440411 ** ------------------------------------------------------------------------------------------------------------- */
    441 inline void PipelineCompiler::updatePhisAfterTermination(BuilderRef b, Value * const terminationMode) {
     412inline void PipelineCompiler::updatePhisAfterTermination(BuilderRef b) {
    442413    BasicBlock * const exitBlock = b->GetInsertBlock();
    443     mTerminatedPhi->addIncoming(terminationMode, exitBlock);
     414    mTerminatedPhi->addIncoming(getTerminationSignal(b, mKernelIndex), exitBlock);
    444415    mHasProgressedPhi->addIncoming(b->getTrue(), exitBlock);
    445416    const auto numOfInputs = mKernel->getNumOfStreamInputs();
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6261 r6266  
    6969    BasicBlock * const target = b->CreateBasicBlock(prefix + "_hasInputData", mKernelLoopCall);
    7070    branchToTargetOrLoopExit(b, sufficientInput, target);
    71 }
    72 
    73 /** ------------------------------------------------------------------------------------------------------------- *
    74  * @brief producerTerminated
    75  ** ------------------------------------------------------------------------------------------------------------- */
    76 inline Value * PipelineCompiler::producerTerminated(BuilderRef b, const unsigned inputPort) const {
    77     const auto bufferVertex = getInputBufferVertex(inputPort);
    78     const auto producerVertex = parent(bufferVertex, mBufferGraph);
    79     return b->CreateICmpNE(mTerminationGraph[producerVertex], b->getSize(NotTerminated));
    8071}
    8172
     
    138129    b->CreateLikelyCondBr(cond, target, mKernelLoopExit);
    139130    BasicBlock * const exitBlock = b->GetInsertBlock();
    140     mTerminatedPhi->addIncoming(b->getSize(NotTerminated), exitBlock);
     131    mTerminatedPhi->addIncoming(mTerminatedInitially, exitBlock);
    141132    mHasProgressedPhi->addIncoming(mAlreadyProgressedPhi, exitBlock);
    142133    const auto numOfInputs = mKernel->getNumOfStreamInputs();
     
    823814        Constant * const BLOCK_WIDTH = b->getSize(b->getBitBlockWidth());
    824815        Value * const maskedItemCount = b->CreateAnd(itemCount, ConstantExpr::getNeg(BLOCK_WIDTH));
    825         Value * const reportAll = b->CreateICmpNE(mTerminatedPhi, b->getSize(NotTerminated));
    826         itemCount = b->CreateSelect(reportAll, itemCount, maskedItemCount);
     816        Value * const terminated = hasKernelTerminated(b, mKernelIndex);
     817        itemCount = b->CreateSelect(terminated, itemCount, maskedItemCount);
    827818    }
    828819    return itemCount;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6262 r6266  
    33#include <boost/graph/topological_sort.hpp>
    44#include <util/extended_boost_graph_containers.h>
     5#include <boost/graph/topological_sort.hpp>
    56
    67namespace kernel {
     
    127128        const auto d = in_degree(s, G);
    128129        if (d != 0) {
    129             flat_set<Vertex> V;
    130             V.reserve(num_vertices(G) - 2);
     130            dynamic_bitset<> V(num_vertices(G));
    131131            std::queue<Vertex> Q;
    132132            // do a BFS to search for a t-s path
     
    142142                    }
    143143                    assert ("G was not initially acyclic!" && v != s);
    144                     if (LLVM_LIKELY(V.insert(v).second)) {
     144                    if (LLVM_LIKELY(V.test(v))) {
     145                        V.set(v);
    145146                        Q.push(v);
    146147                    }
     
    414415
    415416/** ------------------------------------------------------------------------------------------------------------- *
    416  * @brief makeTerminationGraph
    417  *
    418  * The termination graph is a transitive reduction of I/O dependency graph. It models which kernels to be checked
    419  * to determine whether it is safe to terminate once it has finished processing its current input.
    420  ** ------------------------------------------------------------------------------------------------------------- */
    421 TerminationGraph PipelineCompiler::makeTerminationGraph() const {
    422 
    423     // A pipeline will end for one or two reasons:
    424 
    425     // 1) no progress can be made by any kernel.
    426 
    427     // 2) all pipeline sinks have terminated (i.e., any kernel that writes
    428     // to a pipeline output, is marked as having a side-effect, or produces
    429     // an input for some call).
    430 
    431     using VertexVector = std::vector<TerminationGraph::vertex_descriptor>;
    432 
    433     const auto numOfCalls = mPipelineKernel->getCallBindings().size();
    434     const auto pipelineOutput = mLastKernel;
    435     const auto firstCall = pipelineOutput + 1;
    436     const auto lastCall = firstCall + numOfCalls;
    437 
    438     TerminationGraph G(pipelineOutput + 1);
    439 
    440     // copy and summarize producer -> consumer relations from the buffer graph
    441     for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    442         for (auto buffer : make_iterator_range(out_edges(i, mBufferGraph))) {
    443             const auto bufferVertex = target(buffer, mBufferGraph);
    444             for (auto consumer : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    445                 const auto j = target(consumer, mBufferGraph);
    446                 add_edge(i, j, G);
    447             }
    448         }
    449     }
    450 
    451     // copy and summarize any output scalars of the pipeline or any calls
    452     for (unsigned i = pipelineOutput; i < lastCall; ++i) {
    453         for (auto relationship : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
    454             const auto relationshipVertex = source(relationship, mScalarDependencyGraph);
    455             for (auto producer : make_iterator_range(in_edges(relationshipVertex, mScalarDependencyGraph))) {
    456                 const auto kernel = source(producer, mScalarDependencyGraph);
    457                 assert ("cannot occur" && kernel != pipelineOutput);
    458                 add_edge(kernel, pipelineOutput, G);
    459             }
    460         }
    461     }
    462 
    463     // create a k_i -> P_out edge for every kernel with a side effect attribute
    464     for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    465         if (LLVM_UNLIKELY(mPipeline[i]->hasAttribute(AttrId::SideEffecting))) {
    466             add_edge(i, pipelineOutput, G);
    467         }
    468     }
    469 
    470     // generate a transitive closure
    471     VertexVector ordering;
    472     ordering.reserve(num_vertices(G));
    473     topological_sort(G, std::back_inserter(ordering));
    474 
    475     for (unsigned u : ordering) {
    476         for (auto e : make_iterator_range(in_edges(u, G))) {
    477             const auto s = source(e, G);
    478             for (auto f : make_iterator_range(out_edges(u, G))) {
    479                 add_edge(s, target(f, G), G);
    480             }
    481         }
    482     }
    483 
    484     // then take the transitive reduction
    485     VertexVector sources;
    486     for (unsigned u = pipelineOutput; u--; ) {
    487         for (auto e : make_iterator_range(in_edges(u, G))) {
    488             sources.push_back(source(e, G));
    489         }
    490         std::sort(sources.begin(), sources.end());
    491         for (auto e : make_iterator_range(out_edges(u, G))) {
    492             remove_in_edge_if(target(e, G), [&G, &sources](const TerminationGraph::edge_descriptor f) {
    493                 return std::binary_search(sources.begin(), sources.end(), source(f, G));
    494             }, G);
    495         }
    496         sources.clear();
    497     }
    498 
    499     // TODO: Compute the minimum vertex-disjoint path cover through G, where we consider only
    500     // kernel nodes node and any kernel that could terminate has its in-edges removed. The
    501     // resulting set of paths will be close to the minimum number of bits required to encode
    502     // kernel termination in the pipeline. The complication is when an edge could be added to
    503     // multiple paths but adding it would increase the ceil(log2(path length)) cost of one but
    504     // not the other(s). Considering this, the result will require the minimum number of bits.
    505     // Ignoring this, we can apply Kőnig's theorem to solve this in polynomial time.
    506 
    507     return G;
    508 }
    509 
    510 /** ------------------------------------------------------------------------------------------------------------- *
    511417 * @brief makePopCountGraph
    512418 *
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6262 r6266  
    99#include <boost/container/flat_map.hpp>
    1010#include <boost/graph/adjacency_list.hpp>
    11 #include <boost/graph/topological_sort.hpp>
     11#include <boost/graph/adjacency_matrix.hpp>
    1212#include <boost/range/adaptor/reversed.hpp>
    1313//#include <boost/serialization/strong_typedef.hpp>
    1414#include <boost/math/common_factor_rt.hpp>
     15#include <boost/dynamic_bitset.hpp>
    1516#include <llvm/IR/Module.h>
    1617#include <llvm/Support/raw_ostream.h>
     
    105106using KernelMap = flat_map<const Kernel *, Value>;
    106107
    107 using TerminationGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Value *>;
     108using TerminationGraph = adjacency_list<hash_setS, vecS, bidirectionalS, unsigned, unsigned>;
    108109
    109110using ScalarDependencyGraph = adjacency_list<vecS, vecS, bidirectionalS, Value *, unsigned>;
     
    154155using PopCountGraph = adjacency_list<vecS, vecS, bidirectionalS, no_property, PopCountEdge>;
    155156
    156 enum TerminationMode : unsigned {
    157     NotTerminated = 0
    158     , TerminatedNormally = 1
    159     , TerminatedExplicitly = 2
    160 };
    161 
    162157const static std::string LOGICAL_SEGMENT_SUFFIX = ".LSN";
    163 const static std::string TERMINATION_SIGNAL_SUFFIX = ".TERM";
     158const static std::string TERMINATION_PREFIX = "@TERM";
    164159const static std::string ITEM_COUNT_SUFFIX = ".IC";
    165160const static std::string DEFERRED_ITEM_COUNT_SUFFIX = ".ICD";
     
    205200    void deallocateThreadLocalState(BuilderRef b, Value * const localState);
    206201
     202    void addTerminationProperties(BuilderRef b);
     203
    207204// inter-kernel functions
    208205
     
    235232    void computeFullyProducedItemCounts(BuilderRef b);
    236233
    237     void updatePhisAfterTermination(BuilderRef b, Value * const terminationMode);
     234    void updatePhisAfterTermination(BuilderRef b);
    238235
    239236    void zeroFillPartiallyWrittenOutputStreams(BuilderRef b);
     
    270267    Value * truncateBlockSize(BuilderRef b, const Binding & binding, Value * itemCount) const;
    271268    Value * getTotalItemCount(BuilderRef b, const unsigned inputPort) const;
     269    void resetMemoizedFields();
     270
     271// termination functions
     272
     273    Value * hasKernelTerminated(BuilderRef b, const unsigned kernel) const;
     274
     275    LLVM_READNONE Constant * getTerminationSignal(BuilderRef b, const unsigned kernel) const;
     276
    272277    Value * producerTerminated(BuilderRef b, const unsigned inputPort) const;
    273278    Value * initiallyTerminated(BuilderRef b);
    274     Value * setTerminated(BuilderRef b, Value * const condition, const TerminationMode trueMode, const TerminationMode falseMode) const;
    275     void resetMemoizedFields();
     279    void setTerminated(BuilderRef b) const;
     280    Value * pipelineTerminated(BuilderRef b) const;
     281    void updateTerminationSignal(Value * const signal);
    276282
    277283// pop-count functions
     
    362368
    363369    std::vector<unsigned> lexicalOrderingOfStreamIO() const;
    364     TerminationGraph makeTerminationGraph() const;
     370    TerminationGraph makeTerminationGraph();
    365371    ScalarDependencyGraph makeScalarDependencyGraph() const;
    366372
     
    444450    PHINode *                                   mAlreadyProgressedPhi = nullptr;
    445451    PHINode *                                   mTerminatedPhi = nullptr;
     452    PHINode *                                   mTerminatedAtExitPhi = nullptr;
    446453    Value *                                     mNumOfLinearStrides = nullptr;
    447454    Value *                                     mTerminatedExplicitly = nullptr;
    448455    std::vector<unsigned>                       mPortOrdering;
     456
     457    std::vector<Value *>                        mTerminationSignals;
    449458
    450459    std::vector<Value *>                        mInitiallyProcessedItemCount; // *before* entering the kernel
     
    488497    ConsumerGraph                               mConsumerGraph;
    489498    ScalarDependencyGraph                       mScalarDependencyGraph;
    490     TerminationGraph                            mTerminationGraph;
     499    const TerminationGraph                      mTerminationGraph;
    491500    PopCountGraph                               mPopCountGraph;
    492501
    493502};
    494503
    495 Kernels makePipelineList(PipelineKernel * const pk) {
     504/** ------------------------------------------------------------------------------------------------------------- *
     505 * @brief makePipelineList
     506 ** ------------------------------------------------------------------------------------------------------------- */
     507inline Kernels makePipelineList(PipelineKernel * const pk) {
    496508    const Kernels & P = pk->getKernels();
    497509    const auto n = P.size();
     
    688700}
    689701
    690 
    691 
    692 
    693 
    694702} // end of namespace
    695703
     704#include "pipeline_analysis.hpp"
     705#include "buffer_management_logic.hpp"
     706#include "termination_logic.hpp"
     707#include "consumer_logic.hpp"
     708#include "core_logic.hpp"
     709#include "kernel_logic.hpp"
     710#include "cycle_counter_logic.hpp"
     711#include "popcount_logic.hpp"
     712#include "pipeline_logic.hpp"
     713
     714
    696715#endif // PIPELINE_COMPILER_HPP
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6262 r6266  
    22#include <kernels/relationship.h>
    33#include "pipeline_compiler.hpp"
    4 #include "pipeline_analysis.hpp"
    5 #include "buffer_management_logic.hpp"
    6 #include "consumer_logic.hpp"
    7 #include "core_logic.hpp"
    8 #include "kernel_logic.hpp"
    9 #include "cycle_counter_logic.hpp"
    10 #include "popcount_logic.hpp"
    11 #include "pipeline_logic.hpp"
    124#include <llvm/IR/Function.h>
    135
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6263 r6266  
    3939inline void PipelineCompiler::addPipelineKernelProperties(BuilderRef b) {
    4040    b->setKernel(mPipelineKernel);
     41    addTerminationProperties(b);
    4142    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    4243        addBufferHandlesToPipelineKernel(b, i);
     
    5657
    5758    const auto name = makeKernelName(kernelIndex);
    58     // TODO: prove two termination signals can be fused into a single counter?
    59     mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL_SUFFIX);
     59
     60//    mPipelineKernel->addInternalScalar(sizeTy, name + TERMINATION_SIGNAL_SUFFIX);
    6061    mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
    6162
     
    111112        b->setKernel(mKernel);
    112113        Value * const terminatedOnInit = b->CreateCall(getInitializationFunction(b), args);
    113         if (mKernel->canSetTerminateSignal()) {
    114             setTerminated(b, terminatedOnInit, TerminatedExplicitly, NotTerminated);
    115         }
     114
     115        const auto prefix = makeKernelName(mKernelIndex);
     116        BasicBlock * const kernelTerminated = b->CreateBasicBlock(prefix + "_terminatedOnInit");
     117        BasicBlock * const kernelExit = b->CreateBasicBlock(prefix + "_exit");
     118        b->CreateUnlikelyCondBr(terminatedOnInit, kernelTerminated, kernelExit);
     119
     120        b->SetInsertPoint(kernelTerminated);
     121        setTerminated(b);
     122        b->CreateBr(kernelExit);
     123
     124        b->SetInsertPoint(kernelExit);
    116125    }
    117126}
     
    286295                const Binding & input = mPipelineKernel->getInputScalarBinding(j);
    287296                const Relationship * const rel = getRelationship(input);
    288                 if (isa<ScalarConstant>(rel)) {
     297                if (LLVM_UNLIKELY(isa<ScalarConstant>(rel))) {
    289298                    value = cast<ScalarConstant>(rel)->value();
    290299                } else {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6261 r6266  
    5656        // If this is the producer's final stride, round the index position up
    5757        // to account for a partial stride.
    58         Value * const terminated = b->CreateICmpNE(mTerminatedPhi, b->getSize(NotTerminated));
     58        Value * const terminated = hasKernelTerminated(b, mKernelIndex);
    5959        Value * const rounding = b->CreateSelect(terminated, BLOCK_SIZE_MINUS_1, ZERO);
    6060        Value * const endIndex = b->CreateLShr(b->CreateAdd(produced, rounding), LOG2_BLOCK_WIDTH);
  • icGREP/icgrep-devel/icgrep/re/re_seq.h

    r6223 r6266  
    4545        RE * const item = *i;
    4646        if (LLVM_UNLIKELY(isEmptySet(item))) {
    47             return makeEmptySet();
     47            return item;
    4848        } else if (LLVM_UNLIKELY(llvm::isa<Seq>(item))) {
    4949            for (RE * const innerItem : *llvm::cast<Seq>(item)) {
     
    6767    return llvm::isa<Seq>(s) && llvm::cast<Seq>(s)->empty();
    6868}
    69    
     69
    7070inline RE * u32string2re(std::u32string s) {
    7171    std::vector<RE *> ccs;
     
    7575    return makeSeq(ccs.begin(), ccs.end());
    7676}
    77    
     77
    7878}
    7979
Note: See TracChangeset for help on using the changeset viewer.