source: icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

Last change on this file was 6296, checked in by cameron, 4 months ago

Merge branch 'master' of https://cs-git-research.cs.surrey.sfu.ca/cameron/parabix-devel

File size: 26.7 KB
Line 
1#include "pipeline_compiler.hpp"
2
3// TODO: if we have multiple copies of the same type of kernel executing sequentially, we could avoid
4// generating an "execution call" for each and instead pass in different handles/item counts. This
5// could improve I-Cache utilization.
6
7namespace kernel {
8
9/** ------------------------------------------------------------------------------------------------------------- *
10 * @brief start
11 ** ------------------------------------------------------------------------------------------------------------- */
12void PipelineCompiler::start(BuilderRef b, Value * const initialSegNo) {
13
14    // Create the basic blocks for the loop.
15    BasicBlock * const entryBlock = b->GetInsertBlock();
16    mPipelineLoop = b->CreateBasicBlock("pipelineLoop");
17    mPipelineEnd = b->CreateBasicBlock("pipelineEnd");
18
19    mKernel = nullptr;
20    mKernelIndex = 0;
21    b->CreateBr(mPipelineLoop);
22
23    b->SetInsertPoint(mPipelineLoop);
24    IntegerType * const sizeTy = b->getSizeTy();
25    ConstantInt * const ZERO = b->getSize(0);
26
27    mSegNo = b->CreatePHI(sizeTy, 2, "segNo");
28    mSegNo->addIncoming(initialSegNo, entryBlock);
29    mProgressCounter = b->CreatePHI(sizeTy, 2, "progressCounter");
30    mProgressCounter->addIncoming(ZERO, entryBlock);
31    mPipelineProgress = b->getFalse();
32    mHalted = b->getFalse();
33    #ifdef PRINT_DEBUG_MESSAGES
34    b->CallPrintInt("+++ pipeline start +++", mSegNo);
35    #endif
36    startOptionalCycleCounter(b);
37}
38
39/** ------------------------------------------------------------------------------------------------------------- *
40 * @brief executeKernel
41 ** ------------------------------------------------------------------------------------------------------------- */
42void PipelineCompiler::executeKernel(BuilderRef b) {
43
44    resetMemoizedFields();
45    mPortOrdering = lexicalOrderingOfStreamIO();
46
47    const auto prefix = makeKernelName(mKernelIndex);
48    mKernelLoopEntry = b->CreateBasicBlock(prefix + "_loopEntry", mPipelineEnd);
49    mKernelLoopCall = b->CreateBasicBlock(prefix + "_executeKernel", mPipelineEnd);
50    mKernelTerminationCheck = b->CreateBasicBlock(prefix + "_normalTerminationCheck", mPipelineEnd);
51    mKernelTerminated = b->CreateBasicBlock(prefix + "_terminated", mPipelineEnd);
52    mKernelLoopExit = b->CreateBasicBlock(prefix + "_loopExit", mPipelineEnd);
53    mKernelExit = b->CreateBasicBlock(prefix + "_kernelExit", mPipelineEnd);
54    // The phi catch simplifies compilation logic by "forward declaring" the loop exit point.
55    // Subsequent optimization phases will collapse it into the correct exit block.
56    mKernelLoopExitPhiCatch = b->CreateBasicBlock(prefix + "_kernelExitPhiCatch", mPipelineEnd);
57
58    /// -------------------------------------------------------------------------------------
59    /// KERNEL ENTRY
60    /// -------------------------------------------------------------------------------------
61
62    loadBufferHandles(b);
63    readInitialItemCounts(b);
64    mKernelEntry = b->GetInsertBlock();
65    b->CreateUnlikelyCondBr(initiallyTerminated(b), mKernelExit, mKernelLoopEntry);
66
67    // Set up some PHI nodes early to simplify accumulating their incoming values.
68    initializeKernelLoopEntryPhis(b);
69    initializeKernelCallPhis(b);
70    initializeKernelTerminatedPhis(b);
71    initializeKernelLoopExitPhis(b);
72    initializeKernelExitPhis(b);
73
74    /// -------------------------------------------------------------------------------------
75    /// KERNEL LOOP ENTRY
76    /// -------------------------------------------------------------------------------------
77
78    b->SetInsertPoint(mKernelLoopEntry);
79    readConsumedItemCounts(b);
80    checkForSufficientInputDataAndOutputSpace(b);
81    determineNumOfLinearStrides(b);
82
83    // TODO: it would be better to try and statically prove whether a kernel will only ever
84    // need a single "run" per segment rather than allowing only source kernels to have this
85    // optimization.
86
87    Value * isFinal = nullptr;
88
89    if (mNumOfLinearStrides) {
90
91        BasicBlock * const enteringNonFinalSegment = b->CreateBasicBlock(prefix + "_nonFinalSegment", mKernelLoopCall);
92        BasicBlock * const enteringFinalStride = b->CreateBasicBlock(prefix + "_finalStride", mKernelLoopCall);
93
94        isFinal = b->CreateICmpEQ(mNumOfLinearStrides, b->getSize(0));
95
96        b->CreateUnlikelyCondBr(isFinal, enteringFinalStride, enteringNonFinalSegment);
97
98        /// -------------------------------------------------------------------------------------
99        /// KERNEL ENTERING FINAL STRIDE
100        /// -------------------------------------------------------------------------------------
101
102        b->SetInsertPoint(enteringFinalStride);
103        calculateFinalItemCounts(b);
104        b->CreateBr(mKernelLoopCall);
105
106        /// -------------------------------------------------------------------------------------
107        /// KERNEL ENTERING NON-FINAL SEGMENT
108        /// -------------------------------------------------------------------------------------
109
110        b->SetInsertPoint(enteringNonFinalSegment);
111        calculateNonFinalItemCounts(b);
112        b->CreateBr(mKernelLoopCall);
113
114    } else {
115
116        mNumOfLinearStrides = b->getSize(1);
117        calculateNonFinalItemCounts(b);
118        b->CreateBr(mKernelLoopCall);
119
120    }
121
122    /// -------------------------------------------------------------------------------------
123    /// KERNEL CALL
124    /// -------------------------------------------------------------------------------------
125
126    b->SetInsertPoint(mKernelLoopCall);
127    writeKernelCall(b);
128    writeCopyBackLogic(b);
129
130    BasicBlock * const abnormalTermination =
131            b->CreateBasicBlock(prefix + "_abnormalTermination", mKernelTerminationCheck);
132
133    // If the kernel explicitly terminates, it must set its processed/produced item counts.
134    // Otherwise, the pipeline will update any countable rates, even upon termination.
135    b->CreateUnlikelyCondBr(mTerminatedExplicitly, abnormalTermination, mKernelTerminationCheck);
136
137    /// -------------------------------------------------------------------------------------
138    /// KERNEL NORMAL TERMINATION CHECK
139    /// -------------------------------------------------------------------------------------
140
141    b->SetInsertPoint(mKernelTerminationCheck);
142    normalTerminationCheck(b, isFinal);
143
144    /// -------------------------------------------------------------------------------------
145    /// KERNEL ABNORMAL TERMINATION
146    /// -------------------------------------------------------------------------------------
147
148    b->SetInsertPoint(abnormalTermination);
149    loadItemCountsOfCountableRateStreams(b);
150    b->CreateBr(mKernelTerminated);
151
152    /// -------------------------------------------------------------------------------------
153    /// KERNEL TERMINATED
154    /// -------------------------------------------------------------------------------------
155
156    b->SetInsertPoint(mKernelTerminated);
157    zeroFillPartiallyWrittenOutputStreams(b);
158    setTerminated(b);
159    updatePhisAfterTermination(b);
160    b->CreateBr(mKernelLoopExit);
161
162    /// -------------------------------------------------------------------------------------
163    /// KERNEL LOOP EXIT
164    /// -------------------------------------------------------------------------------------
165
166    b->SetInsertPoint(mKernelLoopExit);
167    updateTerminationSignal(mTerminatedPhi);
168    writeUpdatedItemCounts(b);
169    computeFullyProcessedItemCounts(b);
170    computeMinimumConsumedItemCounts(b);
171    computeMinimumPopCountReferenceCounts(b);
172    writeCopyForwardLogic(b);
173    writePopCountComputationLogic(b);
174    computeFullyProducedItemCounts(b);
175    mKernelLoopExitPhiCatch->moveAfter(b->GetInsertBlock());
176    b->CreateBr(mKernelLoopExitPhiCatch);
177    b->SetInsertPoint(mKernelLoopExitPhiCatch);
178    b->CreateBr(mKernelExit);
179
180    /// -------------------------------------------------------------------------------------
181    /// KERNEL EXIT (CONTINUED)
182    /// -------------------------------------------------------------------------------------
183
184    b->SetInsertPoint(mKernelExit);
185    mKernelExit->moveAfter(mKernelLoopExitPhiCatch);
186    updateTerminationSignal(mTerminatedAtExitPhi);
187    writeFinalConsumedItemCounts(b);
188    updatePopCountReferenceCounts(b);
189    readFinalProducedItemCounts(b);
190    updateOptionalCycleCounter(b);
191    mHalted = mHaltedPhi;
192    #ifdef PRINT_DEBUG_MESSAGES
193    b->CallPrintInt("--- " + prefix + ".halted ---", mHalted);
194    #endif
195    assert (mKernel == mPipeline[mKernelIndex] && b->getKernel() == mKernel);
196}
197
198/** ------------------------------------------------------------------------------------------------------------- *
199 * @brief isParamAddressable
200 ** ------------------------------------------------------------------------------------------------------------- */
201inline bool isAddressable(const Binding & binding) {
202    if (binding.isDeferred()) {
203        return true;
204    }
205    const ProcessingRate & rate = binding.getRate();
206    return (rate.isBounded() || rate.isUnknown());
207}
208
209/** ------------------------------------------------------------------------------------------------------------- *
210 * @brief end
211 ** ------------------------------------------------------------------------------------------------------------- */
212void PipelineCompiler::end(BuilderRef b, const unsigned step) {
213
214    // A pipeline will end for one or two reasons:
215
216    // 1) Process has *halted* due to insufficient pipeline I/O.
217
218    // 2) All pipeline sinks have terminated (i.e., any kernel that writes
219    // to a pipeline output, is marked as having a side-effect, or produces
220    // an input for some call in which no dependent kernels is a pipeline
221    // sink).
222
223    b->setKernel(mPipelineKernel);
224
225    ConstantInt * const ZERO = b->getSize(0);
226    ConstantInt * const ONE = b->getSize(1);
227    ConstantInt * const TWO = b->getSize(2);
228    Value * const plusOne = b->CreateAdd(mProgressCounter, ONE);
229    Value * const newProgressCounter = b->CreateSelect(mPipelineProgress, ZERO, plusOne);
230    Value * const noProgress = b->CreateICmpEQ(newProgressCounter, TWO);
231
232    Value * const terminated = pipelineTerminated(b);
233    Value * const done = b->CreateOr(mHalted, terminated);
234
235    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
236        b->CreateAssertZero(noProgress,
237            "Dead lock detected: pipeline could not progress after two iterations");
238    }
239
240    #ifdef PRINT_DEBUG_MESSAGES
241    b->CallPrintInt("+++ pipeline end +++", mSegNo);
242    #endif
243
244    Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(step));
245    BasicBlock * const exitBlock = b->GetInsertBlock();
246    mSegNo->addIncoming(nextSegNo, exitBlock);
247    mProgressCounter->addIncoming(newProgressCounter, exitBlock);
248    b->CreateUnlikelyCondBr(done, mPipelineEnd, mPipelineLoop);
249
250    b->SetInsertPoint(mPipelineEnd);
251    mSegNo = nullptr;
252    b->setKernel(mPipelineKernel);
253
254    writePipelineIOItemCounts(b);
255
256    if (mPipelineTerminated) {
257        b->CreateStore(terminated, mPipelineTerminated);
258    }
259}
260
261/** ------------------------------------------------------------------------------------------------------------- *
262 * @brief pipelineTerminated
263 ** ------------------------------------------------------------------------------------------------------------- */
264inline Value * PipelineCompiler::pipelineTerminated(BuilderRef b) const {
265    Value * terminated = b->getTrue();
266    // check whether every sink has terminated
267    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mTerminationGraph))) {
268        const auto kernel = source(e, mTerminationGraph);
269        terminated = b->CreateAnd(terminated, hasKernelTerminated(b, kernel));
270    }
271    return terminated;
272}
273
274
275/** ------------------------------------------------------------------------------------------------------------- *
276 * @brief readPipelineIOItemCounts
277 ** ------------------------------------------------------------------------------------------------------------- */
278void PipelineCompiler::readPipelineIOItemCounts(BuilderRef b) {
279
280    // TODO: this needs to be considered more: if we have multiple consumers of a pipeline input and
281    // they process the input data at differing rates, how do we ensure that we always resume processing
282    // at the correct position? We can store the actual item counts / delta of the consumed count
283    // internally but this would be problematic for optimization branches as we may have processed data
284    // using the alternate path and any internally stored counts/deltas are irrelevant.
285
286    // Would a simple "reset" be enough?
287
288
289    mTotalItems.resize(num_vertices(mBufferGraph) - mPipelineOutput, nullptr);
290
291    for (const auto e : make_iterator_range(out_edges(mPipelineInput, mBufferGraph))) {
292
293
294
295        const auto buffer = target(e, mBufferGraph);
296        const auto inputPort = mBufferGraph[e].inputPort();
297        Value * const available = mPipelineKernel->getAvailableInputItems(inputPort);
298        mTotalItems[getBufferIndex(buffer)] = available;
299        mConsumerGraph[buffer].Consumed = available;
300
301        Value * const inPtr = mPipelineKernel->getProcessedInputItemsPtr(inputPort);
302        Value * const processed = b->CreateLoad(inPtr);
303
304        for (const auto e : make_iterator_range(out_edges(buffer, mBufferGraph))) {
305            const auto inputPort = mBufferGraph[e].inputPort();
306            const auto kernelIndex = target(e, mBufferGraph);
307            Kernel * const kernel = mPipeline[kernelIndex];
308            const Binding & input = kernel->getInputStreamSetBinding(inputPort);
309            const auto prefix = makeBufferName(kernelIndex, input);
310            Value * const ptr = b->getScalarFieldPtr(prefix + ITEM_COUNT_SUFFIX);
311            b->CreateStore(processed, ptr);
312        }
313    }
314
315    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mBufferGraph))) {
316        const auto buffer = source(e, mBufferGraph);
317        const auto outputPort = mBufferGraph[e].outputPort();
318
319        Value * outPtr = mPipelineKernel->getProducedOutputItemsPtr(outputPort);
320        Value * const produced = b->CreateLoad(outPtr);
321
322        for (const auto e : make_iterator_range(in_edges(buffer, mBufferGraph))) {
323            const auto inputPort = mBufferGraph[e].outputPort();
324            const auto kernelIndex = source(e, mBufferGraph);
325            Kernel * const kernel = mPipeline[kernelIndex];
326            const Binding & output = kernel->getOutputStreamSetBinding(inputPort);
327            const auto prefix = makeBufferName(kernelIndex, output);
328            Value * const ptr = b->getScalarFieldPtr(prefix + ITEM_COUNT_SUFFIX);
329            b->CreateStore(produced, ptr);
330        }
331    }
332
333}
334
335/** ------------------------------------------------------------------------------------------------------------- *
336 * @brief writePipelineIOItemCounts
337 ** ------------------------------------------------------------------------------------------------------------- */
338void PipelineCompiler::writePipelineIOItemCounts(BuilderRef b) {
339
340    for (const auto e : make_iterator_range(out_edges(mPipelineInput, mBufferGraph))) {
341        const auto inputPort = mBufferGraph[e].inputPort();
342        const Binding & input = mPipelineKernel->getInputStreamSetBinding(inputPort);
343        Value * const ptr = mPipelineKernel->getProcessedInputItemsPtr(inputPort);
344        const auto prefix = makeBufferName(mPipelineInput, input);
345        Value * const consumed = b->getScalarField(prefix + CONSUMED_ITEM_COUNT_SUFFIX);
346        b->CreateStore(consumed, ptr);
347    }
348
349    for (const auto e : make_iterator_range(in_edges(mPipelineOutput, mBufferGraph))) {
350        const auto externalPort = mBufferGraph[e].outputPort();
351        const auto buffer = source(e, mBufferGraph);
352        const auto pe = in_edge(buffer, mBufferGraph);
353        const auto internalPort = mBufferGraph[pe].outputPort();
354        const auto producer = source(pe, mBufferGraph);
355        const Kernel * const kernel = mPipeline[producer];
356        const Binding & output = kernel->getOutputStreamSetBinding(internalPort);
357        Value * const ptr = mPipelineKernel->getProducedOutputItemsPtr(externalPort);
358        const auto prefix = makeBufferName(producer, output);
359        Value * const produced = b->getScalarField(prefix + ITEM_COUNT_SUFFIX);
360        b->CreateStore(produced, ptr);
361    }
362
363}
364
365/** ------------------------------------------------------------------------------------------------------------- *
366 * @brief initializeKernelLoopEntryPhis
367 ** ------------------------------------------------------------------------------------------------------------- */
368inline void PipelineCompiler::initializeKernelLoopEntryPhis(BuilderRef b) {
369    b->SetInsertPoint(mKernelLoopEntry);
370    const auto numOfInputs = mKernel->getNumOfStreamInputs();
371    Type * const sizeTy = b->getSizeTy();
372    for (unsigned i = 0; i < numOfInputs; ++i) {
373        const Binding & input = mKernel->getInputStreamSetBinding(i);
374        const auto prefix = makeBufferName(mKernelIndex, input);
375        mAlreadyProcessedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_alreadyProcessed");
376        mAlreadyProcessedPhi[i]->addIncoming(mInitiallyProcessedItemCount[i], mKernelEntry);
377        if (mInitiallyProcessedDeferredItemCount[i]) {
378            mAlreadyProcessedDeferredPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_alreadyProcessedDeferred");
379            mAlreadyProcessedDeferredPhi[i]->addIncoming(mInitiallyProcessedDeferredItemCount[i], mKernelEntry);
380        }
381    }
382    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
383    for (unsigned i = 0; i < numOfOutputs; ++i) {
384        const Binding & output = mKernel->getOutputStreamSetBinding(i);
385        const auto prefix = makeBufferName(mKernelIndex, output);
386        mAlreadyProducedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_alreadyProduced");
387        mAlreadyProducedPhi[i]->addIncoming(mInitiallyProducedItemCount[i], mKernelEntry);
388    }
389    // Since we may loop and call the kernel again, we want to mark that we've progressed
390    // if we execute any kernel even if we could not complete a full segment.
391    const auto prefix = makeKernelName(mKernelIndex);
392    mAlreadyProgressedPhi = b->CreatePHI(b->getInt1Ty(), 2, prefix + "_madeProgress");
393    mAlreadyProgressedPhi->addIncoming(mPipelineProgress, mKernelEntry);
394}
395
396/** ------------------------------------------------------------------------------------------------------------- *
397 * @brief initializeKernelCallPhis
398 ** ------------------------------------------------------------------------------------------------------------- */
399inline void PipelineCompiler::initializeKernelCallPhis(BuilderRef b) {
400    b->SetInsertPoint(mKernelLoopCall);
401    const auto numOfInputs = mKernel->getNumOfStreamInputs();
402    Type * const sizeTy = b->getSizeTy();
403    for (unsigned i = 0; i < numOfInputs; ++i) {
404        const Binding & input = mKernel->getInputStreamSetBinding(i);
405        const auto prefix = makeBufferName(mKernelIndex, input);
406        mLinearInputItemsPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_linearlyAccessible");
407    }
408    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
409    for (unsigned i = 0; i < numOfOutputs; ++i) {
410        const Binding & output = mKernel->getOutputStreamSetBinding(i);
411        const auto prefix = makeBufferName(mKernelIndex, output);
412        mLinearOutputItemsPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_linearlyWritable");
413    }
414}
415
416/** ------------------------------------------------------------------------------------------------------------- *
417 * @brief initializeKernelTerminatedPhis
418 ** ------------------------------------------------------------------------------------------------------------- */
419inline void PipelineCompiler::initializeKernelTerminatedPhis(BuilderRef b) {
420    b->SetInsertPoint(mKernelTerminated);
421    const auto numOfInputs = mKernel->getNumOfStreamInputs();
422    Type * const sizeTy = b->getSizeTy();
423    for (unsigned i = 0; i < numOfInputs; ++i) {
424        const Binding & input = mKernel->getInputStreamSetBinding(i);
425        const auto prefix = makeBufferName(mKernelIndex, input);
426        mFinalProcessedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_finalProcessed");
427    }
428    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
429    for (unsigned i = 0; i < numOfOutputs; ++i) {
430        const Binding & output = mKernel->getOutputStreamSetBinding(i);
431        const auto prefix = makeBufferName(mKernelIndex, output);
432        mFinalProducedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_finalProduced");
433    }
434}
435
436/** ------------------------------------------------------------------------------------------------------------- *
437 * @brief initializeKernelLoopExitPhis
438 ** ------------------------------------------------------------------------------------------------------------- */
439inline void PipelineCompiler::initializeKernelLoopExitPhis(BuilderRef b) {
440    b->SetInsertPoint(mKernelLoopExit);
441    const auto prefix = makeKernelName(mKernelIndex);
442    IntegerType * const sizeTy = b->getSizeTy();
443    IntegerType * const boolTy = b->getInt1Ty();
444    mTerminatedPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
445    mHasProgressedPhi = b->CreatePHI(boolTy, 2, prefix + "_anyProgress");
446    mHaltingPhi = b->CreatePHI(boolTy, 2, prefix + "_halting");
447    const auto numOfInputs = mKernel->getNumOfStreamInputs();
448    for (unsigned i = 0; i < numOfInputs; ++i) {
449        const Binding & input = mKernel->getInputStreamSetBinding(i);
450        const auto prefix = makeBufferName(mKernelIndex, input);
451        mUpdatedProcessedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_updatedProcessed");
452        if (mAlreadyProcessedDeferredPhi[i]) {
453            mUpdatedProcessedDeferredPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_updatedProcessedDeferred");
454        }
455    }
456    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
457    for (unsigned i = 0; i < numOfOutputs; ++i) {
458        const Binding & output = mKernel->getOutputStreamSetBinding(i);
459        const auto prefix = makeBufferName(mKernelIndex, output);
460        mUpdatedProducedPhi[i] = b->CreatePHI(sizeTy, 2, prefix + "_updatedProduced");
461    }
462}
463
464/** ------------------------------------------------------------------------------------------------------------- *
465 * @brief initializeKernelExitPhis
466 ** ------------------------------------------------------------------------------------------------------------- */
467inline void PipelineCompiler::initializeKernelExitPhis(BuilderRef b) {
468    b->SetInsertPoint(mKernelExit);
469    const auto prefix = makeKernelName(mKernelIndex);
470    IntegerType * const sizeTy = b->getSizeTy();
471    mTerminatedAtExitPhi = b->CreatePHI(sizeTy, 2, prefix + "_terminated");
472    mTerminatedAtExitPhi->addIncoming(mTerminatedInitially, mKernelEntry);
473    mTerminatedAtExitPhi->addIncoming(mTerminatedPhi, mKernelLoopExitPhiCatch);
474
475    IntegerType * const boolTy = b->getInt1Ty();
476
477    mHaltedPhi = b->CreatePHI(boolTy, 2, prefix + "_halted");
478    mHaltedPhi->addIncoming(mHalted, mKernelEntry);
479    mHaltedPhi->addIncoming(mHaltingPhi, mKernelLoopExitPhiCatch);
480
481    PHINode * const pipelineProgress = b->CreatePHI(boolTy, 2, prefix + "_pipelineProgress");
482    pipelineProgress->addIncoming(mPipelineProgress, mKernelEntry);
483    pipelineProgress->addIncoming(mHasProgressedPhi, mKernelLoopExitPhiCatch);
484    mPipelineProgress = pipelineProgress;
485
486    createConsumedPhiNodes(b);
487    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
488    for (unsigned i = 0; i < numOfOutputs; ++i) {
489        const Binding & output = mKernel->getOutputStreamSetBinding(i);
490        const auto prefix = makeBufferName(mKernelIndex, output);
491        PHINode * const fullyProduced = b->CreatePHI(sizeTy, 2, prefix + "_fullyProduced");
492        fullyProduced->addIncoming(mInitiallyProducedItemCount[i], mKernelEntry);
493        mFullyProducedItemCount[i] = fullyProduced;
494    }
495    createPopCountReferenceCounts(b);
496}
497
498/** ------------------------------------------------------------------------------------------------------------- *
499 * @brief normalTerminationCheck
500 ** ------------------------------------------------------------------------------------------------------------- */
501inline void PipelineCompiler::normalTerminationCheck(BuilderRef b, Value * const isFinal) {
502    BasicBlock * const entryBlock = b->GetInsertBlock();
503    if (isFinal) {
504        const auto numOfInputs = mKernel->getNumOfStreamInputs();
505        for (unsigned i = 0; i < numOfInputs; ++i) {
506            mAlreadyProcessedPhi[i]->addIncoming(mProcessedItemCount[i], entryBlock);
507            if (mAlreadyProcessedDeferredPhi[i]) {
508                mAlreadyProcessedDeferredPhi[i]->addIncoming(mProcessedDeferredItemCount[i], entryBlock);
509            }
510            mFinalProcessedPhi[i]->addIncoming(mProcessedItemCount[i], entryBlock);
511        }
512        const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
513        for (unsigned i = 0; i < numOfOutputs; ++i) {
514            mAlreadyProducedPhi[i]->addIncoming(mProducedItemCount[i], entryBlock);
515            mFinalProducedPhi[i]->addIncoming(mProducedItemCount[i], entryBlock);
516        }
517        if (mAlreadyProgressedPhi) {
518            mAlreadyProgressedPhi->addIncoming(b->getTrue(), entryBlock);
519        }
520        b->CreateUnlikelyCondBr(isFinal, mKernelTerminated, mKernelLoopEntry);
521    } else { // just exit the loop
522        const auto numOfInputs = mKernel->getNumOfStreamInputs();
523        for (unsigned i = 0; i < numOfInputs; ++i) {
524            mUpdatedProcessedPhi[i]->addIncoming(mProcessedItemCount[i], entryBlock);
525            if (mUpdatedProcessedDeferredPhi[i]) {
526                mUpdatedProcessedDeferredPhi[i]->addIncoming(mProcessedDeferredItemCount[i], entryBlock);
527            }
528        }
529        const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
530        for (unsigned i = 0; i < numOfOutputs; ++i) {
531            mUpdatedProducedPhi[i]->addIncoming(mProducedItemCount[i], entryBlock);
532        }
533        mTerminatedPhi->addIncoming(mTerminatedInitially, entryBlock);
534        mHasProgressedPhi->addIncoming(b->getTrue(), entryBlock);
535        mHaltingPhi->addIncoming(mHalted, entryBlock);
536        b->CreateBr(mKernelLoopExit);
537    }
538}
539
540/** ------------------------------------------------------------------------------------------------------------- *
541 * @brief updatePhiCountAfterTermination
542 ** ------------------------------------------------------------------------------------------------------------- */
543inline void PipelineCompiler::updatePhisAfterTermination(BuilderRef b) {
544    BasicBlock * const exitBlock = b->GetInsertBlock();
545    mTerminatedPhi->addIncoming(getTerminationSignal(b, mKernelIndex), exitBlock);
546    mHasProgressedPhi->addIncoming(b->getTrue(), exitBlock);
547    mHaltingPhi->addIncoming(mHalted, exitBlock);
548    const auto numOfInputs = mKernel->getNumOfStreamInputs();
549    for (unsigned i = 0; i < numOfInputs; ++i) {
550        Value * const totalCount = getTotalItemCount(b, i);
551        mUpdatedProcessedPhi[i]->addIncoming(totalCount, exitBlock);
552        if (mUpdatedProcessedDeferredPhi[i]) {
553            mUpdatedProcessedDeferredPhi[i]->addIncoming(totalCount, exitBlock);
554        }
555    }
556    const auto numOfOutputs = mKernel->getNumOfStreamOutputs();
557    for (unsigned i = 0; i < numOfOutputs; ++i) {
558        mUpdatedProducedPhi[i]->addIncoming(mFinalProducedPhi[i], exitBlock);
559    }
560}
561
562}
Note: See TracBrowser for help on using the repository browser.