Changeset 6262


Ignore:
Timestamp:
Jan 1, 2019, 3:26:50 PM (3 months ago)
Author:
nmedfort
Message:

Termination detection for single/multi-threaded pipelines

Location:
icGREP/icgrep-devel/icgrep/kernels/pipeline
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6261 r6262  
    2020    IntegerType * const sizeTy = b->getSizeTy();
    2121    ConstantInt * const ZERO = b->getSize(0);
    22     ConstantInt * const NOT_TERMINATED = b->getSize(NotTerminated);
    2322
    2423    mSegNo = b->CreatePHI(sizeTy, 2, "segNo");
     
    2928    // any pipeline input streams are considered produced by the P_{in} vertex.
    3029    mTerminationGraph[0] = mPipelineKernel->isFinal();
    31 
    32     mPipelineTerminated = NOT_TERMINATED;
    3330    #ifdef PRINT_DEBUG_MESSAGES
    3431    b->CallPrintInt("+++ pipeline start +++", mSegNo);
     
    237234    mSegNo = nullptr;
    238235    b->setKernel(mPipelineKernel);
    239 
    240 // TODO: not correct for threaded pipelines
    241 //    if (mPipelineKernel->canSetTerminateSignal()) {
    242 //        Value * const terminatedPtr = mPipelineKernel->getTerminationSignalPtr();
    243 //        b->CreateStore(allTerminated, terminatedPtr);
    244 //    }
     236    if (mPipelineTerminated) {
     237        b->CreateStore(allTerminated, mPipelineTerminated);
     238    }
    245239}
    246240
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6261 r6262  
    454454            const auto relationshipVertex = source(relationship, mScalarDependencyGraph);
    455455            for (auto producer : make_iterator_range(in_edges(relationshipVertex, mScalarDependencyGraph))) {
    456                 const auto j = source(producer, mScalarDependencyGraph);
    457                 add_edge(j, pipelineOutput, G);
     456                const auto kernel = source(producer, mScalarDependencyGraph);
     457                assert ("cannot occur" && kernel != pipelineOutput);
     458                add_edge(kernel, pipelineOutput, G);
    458459            }
    459460        }
     
    495496        sources.clear();
    496497    }
     498
     499    // TODO: Compute the minimum vertex-disjoint path cover through G, where we consider only
     500    // kernel nodes node and any kernel that could terminate has its in-edges removed. The
     501    // resulting set of paths will be close to the minimum number of bits required to encode
     502    // kernel termination in the pipeline. The complication is when an edge could be added to
     503    // multiple paths but adding it would increase the ceil(log2(path length)) cost of one but
     504    // not the other(s). Considering this, the result will require the minimum number of bits.
     505    // Ignoring this, we can apply Kőnig's theorem to solve this in polynomial time.
    497506
    498507    return G;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6261 r6262  
    174174    void generateInitializeMethod(BuilderRef b);
    175175    void generateSingleThreadKernelMethod(BuilderRef b);
    176     void generateMultiThreadKernelMethod(BuilderRef b, const unsigned numOfThreads);
     176    void generateMultiThreadKernelMethod(BuilderRef b);
    177177    void generateFinalizeMethod(BuilderRef b);
    178178    std::vector<Value *> getFinalOutputScalars(BuilderRef b);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6261 r6262  
    8888            mCompiler->generateSingleThreadKernelMethod(b);
    8989        } else {
    90             mCompiler->generateMultiThreadKernelMethod(b, mNumOfThreads);
     90            mCompiler->generateMultiThreadKernelMethod(b);
    9191        }
    9292    }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6261 r6262  
    100100        setActiveKernel(b, i);
    101101        args.resize(in_degree(i, mScalarDependencyGraph) + 1);
    102         #ifndef NDEBUG
    103         std::fill(args.begin(), args.end(), nullptr);
    104         #endif
    105102        args[0] = mKernel->getHandle();
    106103        b->setKernel(mPipelineKernel);
     
    110107            const auto k = mScalarDependencyGraph[pe];
    111108            const Binding & input = mPipelineKernel->getInputScalarBinding(k);
    112             assert (args[j] == nullptr);
    113109            args[j] = b->getScalarField(input.getName());
    114110        }
     
    125121 ** ------------------------------------------------------------------------------------------------------------- */
    126122void PipelineCompiler::generateSingleThreadKernelMethod(BuilderRef b) {
    127 
    128123    StructType * const localStateType = getLocalStateType(b);
    129124    Value * const localState = makeStateObject(b, localStateType);
     
    150145 * fashion such that processing of segment S_i by the full pipeline is carried out by thread i mod T.
    151146 ** ------------------------------------------------------------------------------------------------------------- */
    152 void PipelineCompiler::generateMultiThreadKernelMethod(BuilderRef b, const unsigned numOfThreads) {
     147void PipelineCompiler::generateMultiThreadKernelMethod(BuilderRef b) {
     148
     149    const auto numOfThreads = mPipelineKernel->getNumOfThreads();
    153150
    154151    assert (numOfThreads > 1);
     
    351348}
    352349
    353 enum : int {
     350enum : unsigned {
    354351    HANDLE_INDEX = 0
    355352    , SEGMENT_OFFSET_INDEX = 1
    356353    , LOCAL_STATE_INDEX = 2
    357     , FIRST_STREAM_INDEX = 3
     354    , FIRST_INPUT_STREAM_INDEX = 3
    358355};
    359356
     
    402399    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    403400    for (unsigned i = 0; i < numOfInputs; ++i) {
    404         auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
     401        const auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    405402        Value * const handle = buffer->getHandle();
    406         indices[1] = b->getInt32(i + FIRST_STREAM_INDEX);
     403        indices[1] = b->getInt32(FIRST_INPUT_STREAM_INDEX + i);
    407404        b->CreateStore(handle, b->CreateGEP(threadState, indices));
    408405    }
     406
     407    const auto FIRST_OUTPUT_STREAM_INDEX = FIRST_INPUT_STREAM_INDEX + numOfInputs;
    409408    const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    410409    for (unsigned i = 0; i < numOfOutputs; ++i) {
    411         auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
     410        const auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    412411        Value * const handle = buffer->getHandle();
    413         indices[1] = b->getInt32(i + numOfInputs + FIRST_STREAM_INDEX);
     412        indices[1] = b->getInt32(FIRST_OUTPUT_STREAM_INDEX + i);
    414413        b->CreateStore(handle, b->CreateGEP(threadState, indices));
    415414    }
     
    434433
    435434    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
    436 
    437435    setThreadLocalState(b, b->CreateGEP(threadState, indices));
     436
    438437    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    439438    for (unsigned i = 0; i < numOfInputs; ++i) {
    440         indices[1] = b->getInt32(i + FIRST_STREAM_INDEX);
     439        indices[1] = b->getInt32(FIRST_INPUT_STREAM_INDEX + i);
    441440        Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
    442441        auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    443442        buffer->setHandle(b, streamHandle);
    444443    }
     444    const auto FIRST_OUTPUT_STREAM_INDEX = FIRST_INPUT_STREAM_INDEX + numOfInputs;
    445445    const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    446446    for (unsigned i = 0; i < numOfOutputs; ++i) {
    447         indices[1] = b->getInt32(i + numOfInputs + FIRST_STREAM_INDEX);
     447        indices[1] = b->getInt32(FIRST_OUTPUT_STREAM_INDEX + i);
    448448        Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
    449449        auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
     
    465465}
    466466
    467 enum : int {
     467enum : unsigned {
    468468    POP_COUNT_STRUCT_INDEX = 0
     469    , TERMINATION_SIGNAL_INDEX = 1
    469470};
    470471
     
    473474 ** ------------------------------------------------------------------------------------------------------------- */
    474475inline StructType * PipelineCompiler::getLocalStateType(BuilderRef b) {
    475     StructType * const popCountTy = getPopCountThreadLocalStateType(b);
    476     return StructType::get(popCountTy, nullptr);
     476    std::vector<Type *> fields(2);
     477    fields[POP_COUNT_STRUCT_INDEX] = getPopCountThreadLocalStateType(b);
     478    if (mPipelineKernel->getNumOfThreads() != 1 && mPipelineKernel->canSetTerminateSignal()) {
     479        fields[TERMINATION_SIGNAL_INDEX] = b->getInt1Ty();
     480    } else {
     481        fields[TERMINATION_SIGNAL_INDEX] = StructType::get(b->getContext());
     482    }
     483    return StructType::get(b->getContext(), fields);
    477484}
    478485
     
    497504    mPopCountState = b->CreateGEP(localState, indices);
    498505    assert (mPopCountState->getType()->getPointerElementType() == getPopCountThreadLocalStateType(b));
     506    if (mPipelineKernel->canSetTerminateSignal()) {
     507        if (mPipelineKernel->getNumOfThreads() != 1) {
     508            indices[1] = b->getInt32(TERMINATION_SIGNAL_INDEX);
     509            mPipelineTerminated = b->CreateGEP(localState, indices);
     510        } else {
     511            mPipelineTerminated = mPipelineKernel->getTerminationSignalPtr();
     512        }
     513    }
    499514}
    500515
     
    508523    assert (localState->getType()->getPointerElementType() == getLocalStateType(b));
    509524    deallocatePopCountArrays(b, b->CreateGEP(localState, indices));
     525    if (mPipelineTerminated && mPipelineKernel->getNumOfThreads() != 1) {
     526        indices[1] = b->getInt32(TERMINATION_SIGNAL_INDEX);
     527        Value * terminated = b->CreateLoad(b->CreateGEP(localState, indices));
     528        Value * terminatedPtr = mPipelineKernel->getTerminationSignalPtr();
     529        terminated = b->CreateOr(b->CreateLoad(terminatedPtr), terminated);
     530        b->CreateStore(terminated, terminatedPtr);
     531    }
    510532}
    511533
Note: See TracChangeset for help on using the changeset viewer.