source: icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp @ 6237

Last change on this file since 6237 was 6237, checked in by nmedfort, 5 months ago

Re-enabled segment pipeline parallelism; moved logical segment number into pipeline kernel.

File size: 27.8 KB
RevLine 
[6228]1#include "pipeline_compiler.hpp"
[6184]2
[6233]3const static std::string TERMINATION_SIGNAL = "terminationSignal";
4
[6184]5namespace kernel {
6
7/** ------------------------------------------------------------------------------------------------------------- *
[6237]8 * @brief addPipelineKernelProperties
[6228]9 ** ------------------------------------------------------------------------------------------------------------- */
[6237]10inline void PipelineCompiler::addPipelineKernelProperties(BuilderRef b) {
[6228]11    initializePopCounts();
12    const auto numOfKernels = mPipeline.size();
13    b->setKernel(mPipelineKernel);
14    for (unsigned i = 0; i < numOfKernels; ++i) {
[6237]15        addInternalKernelProperties(b, i);
[6228]16        addBufferHandlesToPipelineKernel(b, i);
17        addPopCountScalarsToPipelineKernel(b, i);
18    }
[6233]19    b->setKernel(mPipelineKernel);
[6228]20}
21
[6237]22//const static std::string PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
23//const static std::string PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
24// const static std::string NON_DEFERRED_ITEM_COUNT_SUFFIX = "_nonDeferredItemCount";
25// const static std::string LOGICAL_SEGMENT_NO_SCALAR = "segmentNo";
26
[6228]27/** ------------------------------------------------------------------------------------------------------------- *
[6237]28 * @brief addInternalKernelProperties
29 ** ------------------------------------------------------------------------------------------------------------- */
30inline void PipelineCompiler::addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex) {
31//    Kernel * const kernel = mPipeline[kernelIndex];
32    IntegerType * const sizeTy = b->getSizeTy();
33
34    const auto name = makeKernelName(kernelIndex);
35    // TODO: prove two termination signals can be fused into a single counter?
36    mPipelineKernel->addInternalScalar(b->getInt1Ty(), name + TERMINATION_SIGNAL);
37    // TODO: non deferred item count for fixed rates could be calculated from seg no.
38    mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_NO_SCALAR);
39
40//    const auto numOfInputs = kernel->getNumOfStreamInputs();
41//    for (unsigned i = 0; i < numOfInputs; i++) {
42//        const Binding & input = kernel->getInputStreamSetBinding(i);
43//        const auto prefix = makeBufferName(kernelIndex, input);
44//        mPipelineKernel->addInternalScalar(sizeTy, prefix + PROCESSED_ITEM_COUNT_SUFFIX);
45//        if (input.isDeferred()) {
46//            mPipelineKernel->addInternalScalar(sizeTy, prefix + NON_DEFERRED_ITEM_COUNT_SUFFIX);
47//        }
48//    }
49
50//    const auto numOfOutputs = kernel->getNumOfStreamOutputs();
51//    for (unsigned i = 0; i < numOfOutputs; i++) {
52//        const Binding & output = kernel->getOutputStreamSetBinding(i);
53//        const auto prefix = makeBufferName(kernelIndex, output);
54//        mPipelineKernel->addInternalScalar(sizeTy, prefix + PRODUCED_ITEM_COUNT_SUFFIX);
55//        if (output.isDeferred()) {
56//            mPipelineKernel->addInternalScalar(sizeTy, prefix + NON_DEFERRED_ITEM_COUNT_SUFFIX);
57//        }
58//    }
59
60}
61
62/** ------------------------------------------------------------------------------------------------------------- *
[6184]63 * @brief generateInitializeMethod
64 ** ------------------------------------------------------------------------------------------------------------- */
65void PipelineCompiler::generateInitializeMethod(BuilderRef b) {
66    const auto numOfKernels = mPipeline.size();
67    for (unsigned i = 0; i < numOfKernels; ++i) {
68        mPipeline[i]->addKernelDeclarations(b);
69    }
70    for (unsigned i = 0; i < numOfKernels; ++i) {
71        Kernel * const kernel = mPipeline[i];
72        if (!kernel->hasFamilyName()) {
73            Value * const handle = kernel->createInstance(b);
74            b->setScalarField(makeKernelName(i), handle);
75        }
76    }
77    constructBuffers(b);
78    std::vector<Value *> args;
79    for (unsigned i = 0; i < numOfKernels; ++i) {
80        setActiveKernel(b, i);
81        args.resize(in_degree(i, mScalarDependencyGraph) + 1);
82        #ifndef NDEBUG
83        std::fill(args.begin(), args.end(), nullptr);
84        #endif
85        args[0] = mKernel->getHandle();
86        b->setKernel(mPipelineKernel);
87        for (const auto ce : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
88            const auto j = mScalarDependencyGraph[ce] + 1;
89            const auto pe = in_edge(source(ce, mScalarDependencyGraph), mScalarDependencyGraph);
90            const auto k = mScalarDependencyGraph[pe];
91            const Binding & input = mPipelineKernel->getInputScalarBinding(k);
92            assert (args[j] == nullptr);
93            args[j] = b->getScalarField(input.getName());
94        }
95        b->setKernel(mKernel);
[6233]96        Value * const terminatedOnInit = b->CreateCall(getInitializationFunction(b), args);
97        if (mKernel->canSetTerminateSignal()) {
98            setTerminated(b, terminatedOnInit);
99        }
[6184]100    }
101}
102
103/** ------------------------------------------------------------------------------------------------------------- *
104 * @brief start
105 ** ------------------------------------------------------------------------------------------------------------- */
106void PipelineCompiler::start(BuilderRef b, Value * const initialSegNo) {
107
108    // Create the basic blocks for the loop.
109    BasicBlock * const entryBlock = b->GetInsertBlock();
110    mPipelineLoop = b->CreateBasicBlock("pipelineLoop");
111    mPipelineEnd = b->CreateBasicBlock("pipelineEnd");
112
113    mKernel = nullptr;
114    mKernelIndex = 0;
115    b->CreateBr(mPipelineLoop);
116
117    b->SetInsertPoint(mPipelineLoop);
118    mSegNo = b->CreatePHI(b->getSizeTy(), 2, "segNo");
119    mSegNo->addIncoming(initialSegNo, entryBlock);
120    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
121        mDeadLockCounter = b->CreatePHI(b->getSizeTy(), 2, "deadLockCounter");
122        mDeadLockCounter->addIncoming(b->getSize(0), entryBlock);
123        mPipelineProgress = b->getFalse();
124    }
[6228]125    #ifdef PRINT_DEBUG_MESSAGES
126    b->CallPrintInt("+++ pipeline start +++", mSegNo);
127    #endif
[6184]128    startOptionalCycleCounter(b);
129}
130
131/** ------------------------------------------------------------------------------------------------------------- *
132 * @brief executeKernel
133 ** ------------------------------------------------------------------------------------------------------------- */
134void PipelineCompiler::executeKernel(BuilderRef b) {
135
[6228]136    resetMemoizedFields();
137    mPortOrdering = lexicalOrderingOfStreamIO();
[6184]138    loadBufferHandles(b);
139
140    mKernelEntry = b->GetInsertBlock();
141
142    const auto kernelName = makeKernelName(mKernelIndex);
143    BasicBlock * const checkProducers = b->CreateBasicBlock(kernelName + "_checkProducers", mPipelineEnd);
144    mKernelLoopEntry = b->CreateBasicBlock(kernelName + "_loopEntry", mPipelineEnd);
145    mKernelLoopCall = b->CreateBasicBlock(kernelName + "_executeKernel", mPipelineEnd);
146    mKernelLoopExit = b->CreateBasicBlock(kernelName + "_loopExit", mPipelineEnd);
147    mKernelExit = b->CreateBasicBlock(kernelName + "_kernelExit", mPipelineEnd);
148    // The phi catch simplifies compilation logic by "forward declaring" the loop exit point.
149    // Subsequent optimization phases will collapse it into the correct exit block.
150    mKernelLoopExitPhiCatch = b->CreateBasicBlock(kernelName + "_kernelExitPhiCatch", mPipelineEnd);
151
152    /// -------------------------------------------------------------------------------------
153    /// KERNEL ENTRY
154    /// -------------------------------------------------------------------------------------
[6233]155    Value * const initiallyTerminated = getInitialTerminationSignal(b);
[6228]156    #ifdef PRINT_DEBUG_MESSAGES
157    if (1) {
158    Constant * const MAX_INT = ConstantInt::getAllOnesValue(mSegNo->getType());
[6233]159    Value * const round = b->CreateSelect(initiallyTerminated, MAX_INT, mSegNo);
[6228]160    b->CallPrintInt("--- " + kernelName + "_start ---", round);
161    }
162    #endif
[6233]163    b->CreateUnlikelyCondBr(initiallyTerminated, mKernelExit, checkProducers);
[6184]164
165    /// -------------------------------------------------------------------------------------
166    /// KERNEL CHECK PRODUCERS
167    /// -------------------------------------------------------------------------------------
168
169    b->SetInsertPoint(checkProducers);
[6228]170    readInitialProducedItemCounts(b);
[6184]171    b->CreateBr(mKernelLoopEntry);
172
173    // Set up some PHI nodes "early" to simplify accumulating their incoming values.
174
175    /// -------------------------------------------------------------------------------------
176    /// KERNEL LOOP ENTRY
177    /// -------------------------------------------------------------------------------------
178
179    b->SetInsertPoint(mKernelLoopEntry);
180    // Since we may loop and call the kernel again, we want to mark that we've progressed
181    // if we execute any kernel even if we could not complete a full segment.
182    if (mPipelineProgress) {
183        mAlreadyProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, kernelName + "_madeProgress");
184        mAlreadyProgressedPhi->addIncoming(mPipelineProgress, checkProducers);
185    }
186
187    /// -------------------------------------------------------------------------------------
188    /// KERNEL CALL
189    /// -------------------------------------------------------------------------------------
190
191    b->SetInsertPoint(mKernelLoopCall);
192    initializeKernelCallPhis(b);
193
194    /// -------------------------------------------------------------------------------------
195    /// KERNEL LOOP EXIT
196    /// -------------------------------------------------------------------------------------
197
198    b->SetInsertPoint(mKernelLoopExit);
199    mTerminatedPhi = b->CreatePHI(b->getInt1Ty(), 2, kernelName + "_terminated");
200    if (mPipelineProgress) {
201        mHasProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, kernelName + "_anyProgress");
202    }
203
204    /// -------------------------------------------------------------------------------------
205    /// KERNEL EXIT
206    /// -------------------------------------------------------------------------------------
207
208    b->SetInsertPoint(mKernelExit);
209    initializeKernelExitPhis(b);
210
211    /// -------------------------------------------------------------------------------------
212    /// KERNEL LOOP ENTRY (CONTINUED)
213    /// -------------------------------------------------------------------------------------
214
215    b->SetInsertPoint(mKernelLoopEntry);
[6228]216    checkForSufficientInputDataAndOutputSpace(b);
217    determineNumOfLinearStrides(b);
218
219    Value * isFinal = nullptr;
220
[6184]221    ConstantInt * const ZERO = b->getSize(0);
222
[6228]223    if (mNumOfLinearStrides) {
[6184]224
225        BasicBlock * const enteringNonFinalSegment = b->CreateBasicBlock(kernelName + "_nonFinalSegment", mKernelLoopCall);
226        BasicBlock * const enteringFinalStride = b->CreateBasicBlock(kernelName + "_finalStride", mKernelLoopCall);
227
[6228]228        isFinal = b->CreateICmpEQ(mNumOfLinearStrides, ZERO);
[6184]229
[6228]230        b->CreateUnlikelyCondBr(isFinal, enteringFinalStride, enteringNonFinalSegment);
[6184]231
232        /// -------------------------------------------------------------------------------------
233        /// KERNEL ENTERING FINAL STRIDE
234        /// -------------------------------------------------------------------------------------
235
236        b->SetInsertPoint(enteringFinalStride);
237        calculateFinalItemCounts(b);
238        b->CreateBr(mKernelLoopCall);
239
[6228]240        /// -------------------------------------------------------------------------------------
241        /// KERNEL ENTERING NON-FINAL SEGMENT
242        /// -------------------------------------------------------------------------------------
[6184]243
[6228]244        b->SetInsertPoint(enteringNonFinalSegment);
245        calculateNonFinalItemCounts(b);
[6184]246        b->CreateBr(mKernelLoopCall);
[6228]247
248    } else {
249        mNumOfLinearStrides = ZERO;
250        b->CreateBr(mKernelLoopCall);
[6184]251    }
252
253    /// -------------------------------------------------------------------------------------
254    /// KERNEL CALL (CONTINUED)
255    /// -------------------------------------------------------------------------------------
256
257    b->SetInsertPoint(mKernelLoopCall);
[6228]258    expandOutputBuffers(b);
[6184]259    writeKernelCall(b);
[6228]260
261    BasicBlock * const incrementItemCounts = b->CreateBasicBlock(kernelName + "_incrementItemCounts", mKernelLoopExit);
262    BasicBlock * const terminationCheck = b->CreateBasicBlock(kernelName + "_normalTerminationCheck", mKernelLoopExit);
263    BasicBlock * const terminated = b->CreateBasicBlock(kernelName + "_terminated", mKernelLoopExit);
264
265    // If the kernel itself terminates, it must set the final processed/produced item counts.
266    // Otherwise, the pipeline will update any countable rates, even upon termination.
[6233]267    b->CreateUnlikelyCondBr(mTerminationExplicitly, terminated, incrementItemCounts);
[6228]268
269    /// -------------------------------------------------------------------------------------
270    /// KERNEL INCREMENT ITEM COUNTS
271    /// -------------------------------------------------------------------------------------
272
273    b->SetInsertPoint(incrementItemCounts);
274    // TODO: phi out the item counts and set them once at the end.
275    incrementItemCountsOfCountableRateStreams(b);
[6184]276    writeCopyBackLogic(b);
[6228]277    b->CreateBr(terminationCheck);
278
279    /// -------------------------------------------------------------------------------------
280    /// KERNEL NORMAL TERMINATION CHECK
281    /// -------------------------------------------------------------------------------------
282
283    b->SetInsertPoint(terminationCheck);
284    if (isFinal) {
[6184]285        if (mAlreadyProgressedPhi) {
[6228]286            mAlreadyProgressedPhi->addIncoming(b->getTrue(), terminationCheck);
[6184]287        }
[6228]288        b->CreateUnlikelyCondBr(isFinal, terminated, mKernelLoopEntry);
[6184]289    } else { // just exit the loop
290        if (mHasProgressedPhi) {
[6228]291            mHasProgressedPhi->addIncoming(b->getTrue(), terminationCheck);
[6184]292        }
[6228]293        mTerminatedPhi->addIncoming(b->getFalse(), terminationCheck);
294        b->CreateBr(mKernelLoopExit);
[6184]295    }
296
297    /// -------------------------------------------------------------------------------------
298    /// KERNEL TERMINATED
299    /// -------------------------------------------------------------------------------------
300
[6228]301    b->SetInsertPoint(terminated);
[6184]302    zeroFillPartiallyWrittenOutputStreams(b);
[6233]303    setTerminated(b, b->getTrue());
[6228]304    BasicBlock * const kernelTerminatedEnd = b->GetInsertBlock();
305    mTerminatedPhi->addIncoming(b->getTrue(), kernelTerminatedEnd);
[6184]306    if (mHasProgressedPhi) {
[6228]307        mHasProgressedPhi->addIncoming(b->getTrue(), kernelTerminatedEnd);
[6184]308    }
309    b->CreateBr(mKernelLoopExit);
310
311    /// -------------------------------------------------------------------------------------
312    /// KERNEL LOOP EXIT (CONTINUED)
313    /// -------------------------------------------------------------------------------------
314
315    b->SetInsertPoint(mKernelLoopExit);
[6228]316    computeFullyProcessedItemCounts(b);
[6184]317    computeMinimumConsumedItemCounts(b);
[6228]318    computeMinimumPopCountReferenceCounts(b);
[6184]319    writeCopyForwardLogic(b);
[6228]320    writePopCountComputationLogic(b);
[6184]321    b->CreateBr(mKernelLoopExitPhiCatch);
322    b->SetInsertPoint(mKernelLoopExitPhiCatch);
323    b->CreateBr(mKernelExit);
324
325    /// -------------------------------------------------------------------------------------
326    /// KERNEL EXIT (CONTINUED)
327    /// -------------------------------------------------------------------------------------
328
329    b->SetInsertPoint(mKernelExit);
330    writeFinalConsumedItemCounts(b);
[6228]331    updatePopCountReferenceCounts(b);
[6184]332
333    // TODO: logically we should only need to read produced item counts in the loop exit; however, that
334    // would mean that we'd first need to load the initial produced item counts prior to the loop entry
335    // to have access to them here and then PHI them out within the kernel loop
336
[6228]337    readFinalProducedItemCounts(b);
[6184]338    updateOptionalCycleCounter(b);
339
340    assert (mKernel == mPipeline[mKernelIndex] && b->getKernel() == mKernel);
341}
342
[6237]343// Synchronization actions for executing a kernel for a particular logical segment.
344
345// Before the segment is processed, CreateAtomicLoadAcquire must be used to load
346// the segment number of the kernel state to ensure that the previous segment is
347// complete (by checking that the acquired segment number is equal to the desired segment
348// number).
349
350// After all segment processing actions for the kernel are complete, and any necessary
351// data has been extracted from the kernel for further pipeline processing, the
352// segment number must be incremented and stored using CreateAtomicStoreRelease.
353
[6184]354/** ------------------------------------------------------------------------------------------------------------- *
[6237]355 * @brief synchronize
[6184]356 ** ------------------------------------------------------------------------------------------------------------- */
357void PipelineCompiler::synchronize(BuilderRef b) {
358
[6237]359    const auto prefix = makeKernelName(mKernelIndex);
360    b->setKernel(mPipelineKernel);
361    BasicBlock * const kernelWait = b->CreateBasicBlock(prefix + "Wait", mPipelineEnd);
[6184]362    b->CreateBr(kernelWait);
363
364    b->SetInsertPoint(kernelWait);
[6237]365    const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
366    const unsigned waitingOnIdx = serialize ? mPipeline.size() - 1 : mKernelIndex;
367    const auto waitingOn = makeKernelName(waitingOnIdx);
368    Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_NO_SCALAR);
369    Value * const processedSegmentCount = b->CreateAtomicLoadAcquire(waitingOnPtr);
[6184]370    assert (processedSegmentCount->getType() == mSegNo->getType());
371    Value * const ready = b->CreateICmpEQ(mSegNo, processedSegmentCount);
372
[6237]373    BasicBlock * const kernelCheck = b->CreateBasicBlock(prefix + "Check", mPipelineEnd);
[6184]374    b->CreateCondBr(ready, kernelCheck, kernelWait);
375
376    b->SetInsertPoint(kernelCheck);
377    b->setKernel(mKernel);
378}
379
[6237]380/** ------------------------------------------------------------------------------------------------------------- *
381 * @brief releaseCurrentSegment
382 ** ------------------------------------------------------------------------------------------------------------- */
383inline void PipelineCompiler::releaseCurrentSegment(BuilderRef b) {
384    b->setKernel(mPipelineKernel);
385    Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(1));
386    const auto prefix = makeKernelName(mKernelIndex);
387    Value * const waitingOnPtr = b->getScalarFieldPtr(prefix + LOGICAL_SEGMENT_NO_SCALAR);
388    b->CreateAtomicStoreRelease(nextSegNo, waitingOnPtr);
389}
[6184]390
391/** ------------------------------------------------------------------------------------------------------------- *
392 * @brief next
393 ** ------------------------------------------------------------------------------------------------------------- */
394void PipelineCompiler::end(BuilderRef b, const unsigned step) {
[6233]395    b->setKernel(mPipelineKernel);
[6184]396    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
397        ConstantInt * const ZERO = b->getSize(0);
398        ConstantInt * const ONE = b->getSize(1);
399        ConstantInt * const TWO = b->getSize(2);
400        Value * const plusOne = b->CreateAdd(mDeadLockCounter, ONE);
401        Value * const newCount = b->CreateSelect(mPipelineProgress, ZERO, plusOne);
[6228]402        b->CreateAssert(b->CreateICmpNE(newCount, TWO),
403                        "Dead lock detected: pipeline could not progress after two iterations");
[6184]404        mDeadLockCounter->addIncoming(newCount, b->GetInsertBlock());
405    }
[6228]406    // check whether every sink has terminated
407    Value * allTerminated = b->getTrue();
408    const auto pipelineOutputVertex = mPipeline.size();
409    for (const auto e : make_iterator_range(in_edges(pipelineOutputVertex, mTerminationGraph))) {
[6184]410        const auto u = source(e, mTerminationGraph);
411        assert (mTerminationGraph[u]);
[6228]412        allTerminated = b->CreateAnd(allTerminated, mTerminationGraph[u]);
[6184]413    }
[6228]414    // or if any output stream of this pipeline cannot support a full stride
415    Value * notEnoughSpace = b->getFalse();
416    for (const auto e : make_iterator_range(in_edges(pipelineOutputVertex, mBufferGraph))) {
417        // TODO: not a very elegant way here; revise
418        const auto bufferVertex = source(e, mBufferGraph);
[6233]419        setActiveKernel(b, parent(bufferVertex, mBufferGraph));
[6228]420        resetMemoizedFields();
421        const auto outputPort = mBufferGraph[e].Port;
422        Value * const writable = getWritableOutputItems(b, outputPort);
423        // NOTE: this method doesn't check a popcount's ref stream to determine how many
424        // items we actually require. Instead it just calculates them as bounded rates.
425        // To support a precise bound, we'd need to produce more ref items than the kernel
426        // that writes to this output actually consumes. Since this effectively adds a
427        // delay equivalent to a LookAhead of a full stride, this doesn't seem useful.
428        Value * const strideLength = getMaximumStrideLength(b, Port::Output, outputPort);
429        notEnoughSpace = b->CreateOr(b->CreateICmpULT(writable, strideLength), notEnoughSpace);
430    }
[6233]431    b->setKernel(mPipelineKernel);
[6228]432    Value * const done = b->CreateOr(allTerminated, notEnoughSpace);
433    #ifdef PRINT_DEBUG_MESSAGES
434    Constant * const ONES = Constant::getAllOnesValue(mSegNo->getType());
435    b->CallPrintInt("+++ pipeline end +++", b->CreateSelect(done, ONES, mSegNo));
436    #endif
437
[6184]438    Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(step));
439    mSegNo->addIncoming(nextSegNo, b->GetInsertBlock());
440    b->CreateUnlikelyCondBr(done, mPipelineEnd, mPipelineLoop);
441
442    b->SetInsertPoint(mPipelineEnd);
443    mSegNo = nullptr;
444}
445
446/** ------------------------------------------------------------------------------------------------------------- *
447 * @brief generateFinalizeMethod
448 ** ------------------------------------------------------------------------------------------------------------- */
449void PipelineCompiler::generateFinalizeMethod(BuilderRef b) {
450    printOptionalCycleCounter(b);
451    const auto numOfKernels = mPipeline.size();
452    mOutputScalars.resize(numOfKernels);
453    for (unsigned i = 0; i < numOfKernels; ++i) {
454        setActiveKernel(b, i);
455        loadBufferHandles(b);
456        mOutputScalars[i] = b->CreateCall(getFinalizeFunction(b), mKernel->getHandle());
457    }
458    releaseBuffers(b);
459}
460
461/** ------------------------------------------------------------------------------------------------------------- *
462 * @brief writeOutputScalars
463 ** ------------------------------------------------------------------------------------------------------------- */
464void PipelineCompiler::writeOutputScalars(BuilderRef b, const unsigned u, std::vector<Value *> & args) {
465    args.clear();
466    const auto n = in_degree(u, mScalarDependencyGraph);
467    args.resize(n, nullptr);
468    const auto numOfKernels = mPipeline.size();
469    for (const auto e : make_iterator_range(in_edges(u, mScalarDependencyGraph))) {
470        const auto bufferVertex = source(e, mScalarDependencyGraph);
471        if (LLVM_LIKELY(mScalarDependencyGraph[bufferVertex] == nullptr)) {
472            const auto producer = in_edge(source(e, mScalarDependencyGraph), mScalarDependencyGraph);
473            const auto i = source(producer, mScalarDependencyGraph);
474            const auto j = mScalarDependencyGraph[producer];
475            Value * val = nullptr;
476            if (i == numOfKernels) {
477                const Binding & input = mPipelineKernel->getInputScalarBinding(j);
478                val = b->getScalarField(input.getName());
479            } else { // output scalar of some kernel
480                Value * const outputScalars = mOutputScalars[i]; assert (outputScalars);
481                if (outputScalars->getType()->isAggregateType()) {
482                    val = b->CreateExtractValue(outputScalars, {j});
483                } else { assert (j == 0 && "scalar type is not an aggregate");
484                    val = outputScalars;
485                }
486            }
487            mScalarDependencyGraph[bufferVertex] = val;
488        }
489        const auto k = mScalarDependencyGraph[e];
490        assert (args[k] == nullptr);
491        args[k] = mScalarDependencyGraph[bufferVertex];
492    }
493}
494
495/** ------------------------------------------------------------------------------------------------------------- *
496 * @brief getFinalOutputScalars
497 ** ------------------------------------------------------------------------------------------------------------- */
498std::vector<Value *> PipelineCompiler::getFinalOutputScalars(BuilderRef b) {
499    const auto numOfKernels = mPipeline.size();
500    const auto & calls = mPipelineKernel->getCallBindings();
501    const auto numOfCalls = calls.size();
502    std::vector<Value *> args;
503    b->setKernel(mPipelineKernel);
504    for (unsigned k = 0; k < numOfCalls; ++k) {
505        writeOutputScalars(b, numOfKernels + k + 1, args);
506        Function * const f = cast<Function>(calls[k].Callee);
507        auto i = f->arg_begin();
508        for (auto j = args.begin(); j != args.end(); ++i, ++j) {
509            assert (i != f->arg_end());
510            *j = b->CreateZExtOrTrunc(*j, i->getType());
511        }
512        assert (i == f->arg_end());
513        b->CreateCall(f, args);
514    }
515    writeOutputScalars(b, numOfKernels, args);
516    return args;
517}
518
519/** ------------------------------------------------------------------------------------------------------------- *
520 * @brief initializeKernelCallPhis
521 ** ------------------------------------------------------------------------------------------------------------- */
522inline void PipelineCompiler::initializeKernelCallPhis(BuilderRef b) {
523    const auto numOfInputs = mKernel->getNumOfStreamInputs();
524    Type * const sizeTy = b->getSizeTy();
525    for (unsigned i = 0; i < numOfInputs; ++i) {
526        const Binding & input = mKernel->getInputStreamSetBinding(i);
[6228]527        const auto prefix = makeBufferName(mKernelIndex, input);
528        mLinearInputItemsPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_linearlyAccessible");
[6184]529    }
530    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
531    for (unsigned i = 0; i < numOfOutputs; ++i) {
[6228]532        if (LLVM_LIKELY(getOutputBufferType(i) != BufferType::Managed)) {
533            const Binding & output = mKernel->getOutputStreamSetBinding(i);
534            const auto prefix = makeBufferName(mKernelIndex, output);
535            mLinearOutputItemsPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_linearlyWritable");
[6184]536        }
537    }
538}
539
540/** ------------------------------------------------------------------------------------------------------------- *
541 * @brief initializeKernelExitPhis
542 ** ------------------------------------------------------------------------------------------------------------- */
543inline void PipelineCompiler::initializeKernelExitPhis(BuilderRef b) {
544    const auto kernelName = makeKernelName(mKernelIndex);
[6228]545    mTerminatedFlag = b->CreatePHI(b->getInt1Ty(), 2, kernelName + "_terminated");
546    mTerminatedFlag->addIncoming(b->getTrue(), mKernelEntry);
547    mTerminatedFlag->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
548    mTerminationGraph[mKernelIndex] = mTerminatedFlag;
[6184]549    if (mPipelineProgress) {
550        PHINode * const pipelineProgress = b->CreatePHI(b->getInt1Ty(), 2, "pipelineProgress");
551        pipelineProgress->addIncoming(mPipelineProgress, mKernelEntry);
552        pipelineProgress->addIncoming(mHasProgressedPhi, mKernelLoopExitPhiCatch);
553        mPipelineProgress = pipelineProgress;
554    }
555    createConsumedPhiNodes(b);
[6228]556    createPopCountReferenceCounts(b);
[6184]557}
558
559/** ------------------------------------------------------------------------------------------------------------- *
[6233]560 * @brief getInitialTerminationSignal
[6184]561 ** ------------------------------------------------------------------------------------------------------------- */
[6233]562inline Value * PipelineCompiler::getInitialTerminationSignal(BuilderRef b) const {
563    b->setKernel(mPipelineKernel);
564    const auto prefix = makeKernelName(mKernelIndex);
565    Value * const terminated = b->getScalarField(prefix + TERMINATION_SIGNAL);
566    b->setKernel(mKernel);
567    return terminated;
[6184]568}
569
570/** ------------------------------------------------------------------------------------------------------------- *
571 * @brief setTerminated
572 ** ------------------------------------------------------------------------------------------------------------- */
[6233]573inline void PipelineCompiler::setTerminated(BuilderRef b, Value * const value) {
[6228]574    const auto prefix = makeKernelName(mKernelIndex);
[6233]575    b->setKernel(mPipelineKernel);
576    b->setScalarField(prefix + TERMINATION_SIGNAL, value);
[6228]577    #ifdef PRINT_DEBUG_MESSAGES
[6233]578    b->CallPrintInt("*** " + prefix + "_terminated ***", value);
[6228]579    #endif
[6233]580    b->setKernel(mKernel);
[6184]581}
582
583}
Note: See TracBrowser for help on using the repository browser.