source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5493

Last change on this file since 5493 was 5493, checked in by cameron, 2 years ago

Restore check-ins from the last several days

File size: 48.0 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <llvm/IR/DerivedTypes.h>
12#include <pablo/branch.h>
13#include <pablo/pe_advance.h>
14#include <pablo/pe_scanthru.h>
15#include <pablo/pe_matchstar.h>
16#include <pablo/pe_var.h>
17#include <kernels/kernel_builder.h>
18#include <toolchain/toolchain.h>
19#include <array>
20
21using namespace llvm;
22
23namespace pablo {
24
25inline static unsigned ceil_log2(const unsigned v) {
26    assert ("log2(0) is undefined!" && v != 0);
27    return (sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v - 1);
28}
29
30inline static unsigned floor_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
32    return ((sizeof(unsigned) * CHAR_BIT) - 1) - __builtin_clz(v);
33}
34
35inline static unsigned nearest_pow2(const unsigned v) {
36    assert(v > 0 && v < (UINT32_MAX / 2));
37    return (v < 2) ? 1 : (1 << ceil_log2(v));
38}
39
40inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
41    return (((x - 1) | (y - 1)) + 1) / y;
42}
43
44using TypeId = PabloAST::ClassTypeId;
45
46inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
47    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt);
48}
49
50#define LONG_ADVANCE_BREAKPOINT 64
51
52/** ------------------------------------------------------------------------------------------------------------- *
53 * @brief initializeCarryData
54 ** ------------------------------------------------------------------------------------------------------------- */
55void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
56
57    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
58    // that is added to the Kernel. The scope index will indicate which struct to access.
59
60    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
61    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
62    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
63    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
64    // the fact every pablo While loop must be executed at least once.
65
66    // A nested loop may also contain a variable length CarryData struct
67
68    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
69    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
70    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
71    // set of carries for that iteration.
72
73    assert (mKernel == nullptr);
74    mCurrentScope = kernel->getEntryBlock();
75    mKernel = kernel;
76
77    mCarryScopes = 0;
78
79    mCarryMetadata.resize(getScopeCount(mCurrentScope));
80
81    Type * const carryStateTy = analyse(iBuilder, mCurrentScope);
82
83    kernel->addScalar(carryStateTy, "carries");
84
85    if (mHasLoop) {
86        kernel->addScalar(iBuilder->getInt32Ty(), "selector");
87    }
88    if (mHasLongAdvance) {
89        kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
90    }
91}
92
93bool isDynamicallyAllocatedType(const Type * const ty) {
94    if (isa<StructType>(ty) && ty->getStructNumElements() == 3) {
95        return (ty->getStructElementType(1)->isPointerTy() && ty->getStructElementType(2)->isPointerTy() && ty->getStructElementType(0)->isIntegerTy());
96    }
97    return false;
98}
99
100bool containsDynamicallyAllocatedType(const Type * const ty) {
101    if (isa<StructType>(ty)) {
102        for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
103            if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
104                return true;
105            }
106        }
107    }
108    return false;
109}
110
111void freeDynamicallyAllocatedMemory(const std::unique_ptr<kernel::KernelBuilder> & idb, Value * const frame) {
112    StructType * const ty = cast<StructType>(frame->getType()->getPointerElementType());
113    std::array<Value *, 3> indices;
114    indices[0] = idb->getInt32(0);
115    for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
116        if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
117            indices[1] = idb->getInt32(i);
118            indices[2] = idb->getInt32(1);
119            Value * const innerFrame = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
120            if (containsDynamicallyAllocatedType(innerFrame->getType())) {
121                indices[2] = indices[0];
122                Value *  const count = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
123                BasicBlock * const entry = idb->GetInsertBlock();
124                BasicBlock * const cond = idb->CreateBasicBlock("freeCarryDataCond");
125                BasicBlock * const body = idb->CreateBasicBlock("freeCarryDataLoop");
126                BasicBlock * const exit = idb->CreateBasicBlock("freeCarryDataExit");
127                idb->CreateBr(cond);
128                idb->SetInsertPoint(cond);
129                PHINode * const index = idb->CreatePHI(count->getType(), 2);
130                index->addIncoming(ConstantInt::getNullValue(count->getType()), entry);
131                Value * test = idb->CreateICmpNE(index, count);
132                idb->CreateCondBr(test, body, exit);
133                idb->SetInsertPoint(body);
134                freeDynamicallyAllocatedMemory(idb, idb->CreateGEP(innerFrame, index));
135                index->addIncoming(idb->CreateAdd(index, ConstantInt::get(count->getType(), 1)), body);
136                idb->CreateBr(cond);
137                idb->SetInsertPoint(exit);
138            }
139            idb->CreateFree(innerFrame);
140            indices[2] = idb->getInt32(2);
141            Value *  const summary = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
142            idb->CreateFree(summary);
143        }
144    }
145}
146
147/** ------------------------------------------------------------------------------------------------------------- *
148 * @brief releaseCarryData
149 ** ------------------------------------------------------------------------------------------------------------- */
150void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
151    if (mHasNonCarryCollapsingLoops) {
152        freeDynamicallyAllocatedMemory(idb, idb->getScalarFieldPtr("carries"));
153    }
154}
155
156/** ------------------------------------------------------------------------------------------------------------- *
157 * @brief initializeCodeGen
158 ** ------------------------------------------------------------------------------------------------------------- */
159void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
160
161    assert(!mCarryMetadata.empty());
162    mCarryInfo = &mCarryMetadata[0];
163    assert (!mCarryInfo->hasSummary());
164
165    mCurrentFrame = iBuilder->getScalarFieldPtr("carries");
166    mCurrentFrameIndex = 0;
167    mCarryScopes = 0;
168    mCarryScopeIndex.push_back(0);
169
170    assert (mCarryFrameStack.empty());
171
172    assert (mCarrySummaryStack.empty());
173
174    Type * const carryTy = iBuilder->getBitBlockType();
175
176    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
177
178    if (mHasLoop) {       
179        mLoopSelector = iBuilder->getScalarField("selector");
180        mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
181    }
182
183}
184
185/** ------------------------------------------------------------------------------------------------------------- *
186 * @brief finalizeCodeGen
187 ** ------------------------------------------------------------------------------------------------------------- */
188void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
189    if (mHasLoop) {
190        iBuilder->setScalarField("selector", mNextLoopSelector);
191    }
192    if (mHasLongAdvance) {
193        Value * idx = iBuilder->getScalarField("CarryBlockIndex");
194        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
195        iBuilder->setScalarField("CarryBlockIndex", idx);
196    }
197    assert (mCarryFrameStack.empty());   
198    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
199    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
200    mCarrySummaryStack.clear();
201    assert (mCarryScopeIndex.size() == 1);
202    mCarryScopeIndex.clear();
203}
204
205/** ------------------------------------------------------------------------------------------------------------- *
206 * @brief enterLoopScope
207 ** ------------------------------------------------------------------------------------------------------------- */
208void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
209    assert (scope);
210    assert (mHasLoop);
211    ++mLoopDepth;
212    enterScope(iBuilder, scope);
213}
214
215/** ------------------------------------------------------------------------------------------------------------- *
216 * @brief enterLoopBody
217 ** ------------------------------------------------------------------------------------------------------------- */
218void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock) {
219    if (mCarryInfo->hasSummary()) {
220        Type * const carryTy = iBuilder->getBitBlockType();
221        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(carryTy, 2, "summary");
222        assert (!mCarrySummaryStack.empty());
223        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
224        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
225        // properly OR it into the outgoing summary value.
226        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
227        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
228        mCarrySummaryStack.back() = phiCarryOutSummary;
229        mCarrySummaryStack.push_back(phiCarryOutSummary);
230    }
231    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
232
233        assert (mCarryInfo->hasSummary());
234
235        Type * const int8PtrTy = iBuilder->getInt8PtrTy();
236        Type * const carryTy = iBuilder->getBitBlockType();
237        PointerType * const carryPtrTy = carryTy->getPointerTo();
238
239        // Check whether we need to resize the carry state
240        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
241        mLoopIndicies.push_back(index);
242        index->addIncoming(iBuilder->getSize(0), entryBlock);
243        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
244        Value * capacity = iBuilder->CreateLoad(capacityPtr, "capacity");
245        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
246        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
247        Value * array = iBuilder->CreateLoad(arrayPtr, "array");
248        BasicBlock * const entry = iBuilder->GetInsertBlock();
249        BasicBlock * const resizeCarryState = iBuilder->CreateBasicBlock("ResizeCarryState");
250        BasicBlock * const reallocExisting = iBuilder->CreateBasicBlock("ReallocExisting");
251        BasicBlock * const createNew = iBuilder->CreateBasicBlock("CreateNew");
252        BasicBlock * const resumeKernel = iBuilder->CreateBasicBlock("ResumeKernel");
253        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
254
255        // RESIZE CARRY BLOCK
256        iBuilder->SetInsertPoint(resizeCarryState);
257        const auto BlockWidth = iBuilder->getBitBlockWidth() / 8;
258        const auto Log2BlockWidth = floor_log2(BlockWidth);
259        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
260        Value * const summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
261        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
262        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
263
264        // REALLOCATE EXISTING
265        iBuilder->SetInsertPoint(reallocExisting);
266        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
267        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
268        Value * const newArray = iBuilder->CreateCacheAlignedMalloc(newCapacitySize);
269        iBuilder->CreateMemCpy(newArray, array, capacitySize, iBuilder->getCacheAlignment());
270        iBuilder->CreateFree(array);
271        iBuilder->CreateStore(newArray, arrayPtr);
272        Value * const startNewArrayPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), capacitySize);
273        iBuilder->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
274        Value * const newCapacity = iBuilder->CreateShl(capacity, 1);
275        iBuilder->CreateStore(newCapacity, capacityPtr);
276        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
277        Value * const summarySize = iBuilder->CreateShl(iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
278        Constant * const additionalSpace = iBuilder->getSize(2 * BlockWidth);
279        Value * const newSummarySize = iBuilder->CreateAdd(summarySize, additionalSpace);
280        Value * const newSummary = iBuilder->CreateBlockAlignedMalloc(newSummarySize);
281        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
282        iBuilder->CreateFree(summary);
283        iBuilder->CreateStore(iBuilder->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
284        Value * const startNewSummaryPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newSummary, int8PtrTy), summarySize);
285        iBuilder->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
286        iBuilder->CreateBr(resumeKernel);
287
288        // CREATE NEW
289        iBuilder->SetInsertPoint(createNew);
290        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
291        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
292        iBuilder->CreateStore(initialCapacity, capacityPtr);
293        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
294        Value * initialArray = iBuilder->CreateCacheAlignedMalloc(initialCapacitySize);
295        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
296        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
297        iBuilder->CreateStore(initialArray, arrayPtr);
298        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
299        Value * initialSummary = iBuilder->CreateBlockAlignedMalloc(initialSummarySize);
300        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
301        initialSummary = iBuilder->CreatePointerCast(initialSummary, carryPtrTy);
302        iBuilder->CreateStore(initialSummary, summaryPtr);
303        iBuilder->CreateBr(resumeKernel);
304
305        // RESUME KERNEL
306        iBuilder->SetInsertPoint(resumeKernel);
307        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
308        phiArrayPtr->addIncoming(array, entry);
309        phiArrayPtr->addIncoming(initialArray, createNew);
310        phiArrayPtr->addIncoming(newArray, reallocExisting);
311
312        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
313        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
314        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
315    }
316}
317
318/** ------------------------------------------------------------------------------------------------------------- *
319 * @brief leaveLoopBody
320 ** ------------------------------------------------------------------------------------------------------------- */
321void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * /* exitBlock */) {
322
323    Type * const carryTy = iBuilder->getBitBlockType();
324
325    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
326
327        assert (mCarryInfo->hasSummary());
328
329        ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
330
331        Value * const carryInAccumulator = readCarryInSummary(iBuilder, summaryIndex);
332        Value * const carryOutAccumulator = mCarrySummaryStack.back();
333
334        if (mCarryInfo->hasExplicitSummary()) {
335            writeCarryOutSummary(iBuilder, carryOutAccumulator, summaryIndex);
336        }
337
338        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
339        mCarryFrameStack.pop_back();
340
341        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
342        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
343        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
344        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
345        // (for the subsequent block to subtract) using a half-adder circuit.
346
347        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
348        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
349        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
350
351        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
352        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
353        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
354        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
355
356        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
357
358        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
359
360        BasicBlock * entry = iBuilder->GetInsertBlock();
361        BasicBlock * update = iBuilder->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
362        BasicBlock * resume = iBuilder->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
363
364        iBuilder->CreateBr(update);
365
366        iBuilder->SetInsertPoint(update);
367        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
368        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
369        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
370        borrow->addIncoming(carryInAccumulator, entry);
371        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
372        carry->addIncoming(carryOutAccumulator, entry);
373        // OR the updated carry in summary later for the summaryTest
374        PHINode * const carryInSummary = iBuilder->CreatePHI(carryTy, 2);
375        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
376
377        // half subtractor
378        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
379        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
380        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
381        Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
382        Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
383        iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
384        carryInSummary->addIncoming(nextSummary, update);
385        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
386        borrow->addIncoming(finalBorrow, update);
387
388        // half adder
389        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
390        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
391        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
392        Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
393        iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
394        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
395        carry->addIncoming(finalCarry, update);
396
397        // loop condition
398        i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
399        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
400
401        iBuilder->SetInsertPoint(resume);
402
403        if (codegen::EnableAsserts) {
404            iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
405                                       "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
406        }
407
408        assert (!mLoopIndicies.empty());
409        PHINode * index = mLoopIndicies.back();
410        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
411        mLoopIndicies.pop_back();
412
413        mNextSummaryTest = nextSummary;
414    }
415    if (mCarryInfo->hasSummary()) {
416        const auto n = mCarrySummaryStack.size(); assert (n > 1);
417        Value * carryOut = mCarrySummaryStack.back();
418        mCarrySummaryStack.pop_back();
419        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
420        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
421        // If we're returning to the base scope, reset our accumulated summary value.
422        if (n == 2) {
423            carryOut = Constant::getNullValue(carryTy);
424        }
425        mCarrySummaryStack.back() = carryOut;
426    }
427}
428
429/** ------------------------------------------------------------------------------------------------------------- *
430 * @brief leaveLoopScope
431 ** ------------------------------------------------------------------------------------------------------------- */
432void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
433    assert (mLoopDepth > 0);
434    --mLoopDepth;
435    leaveScope(iBuilder);
436}
437
438/** ------------------------------------------------------------------------------------------------------------- *
439 * @brief enterIfScope
440 ** ------------------------------------------------------------------------------------------------------------- */
441void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
442    ++mIfDepth;
443    enterScope(iBuilder, scope);
444    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
445    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
446    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
447    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
448        assert (mCurrentFrameIndex == 0);
449        mNextSummaryTest = readCarryInSummary(iBuilder, iBuilder->getInt32(0));
450        if (mCarryInfo->hasExplicitSummary()) {
451            mCurrentFrameIndex = 1;
452        }
453    }
454    Type * const carryTy = iBuilder->getBitBlockType();
455    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
456}
457
458/** ------------------------------------------------------------------------------------------------------------- *
459 * @brief generateSummaryTest
460 ** ------------------------------------------------------------------------------------------------------------- */
461Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * condition) {
462    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
463        assert ("summary test was not generated" && mNextSummaryTest);
464        condition = iBuilder->simd_or(condition, mNextSummaryTest);
465        mNextSummaryTest = nullptr;
466    }
467    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
468    return condition;
469}
470
471/** ------------------------------------------------------------------------------------------------------------- *
472 * @brief enterIfBody
473 ** ------------------------------------------------------------------------------------------------------------- */
474void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */, BasicBlock * const entryBlock) {
475    assert (entryBlock);
476}
477
478/** ------------------------------------------------------------------------------------------------------------- *
479 * @brief leaveIfBody
480 ** ------------------------------------------------------------------------------------------------------------- */
481void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const exitBlock) {
482    assert (exitBlock);
483    const auto n = mCarrySummaryStack.size();
484    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
485        writeCarryOutSummary(iBuilder, mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
486    }
487    if (n > 2) {
488        mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
489    }
490}
491
492/** ------------------------------------------------------------------------------------------------------------- *
493 * @brief leaveIfScope
494 ** ------------------------------------------------------------------------------------------------------------- */
495void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
496    assert (mIfDepth > 0);
497    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
498        const auto n = mCarrySummaryStack.size(); assert (n > 0);
499        if (n > 2) {
500            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
501            // appropriate summary is stored in the outer scope.
502            Value * nested = mCarrySummaryStack[n - 1];
503            Value * outer = mCarrySummaryStack[n - 2];
504            assert (nested->getType() == outer->getType());
505            PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
506            phi->addIncoming(outer, entryBlock);
507            phi->addIncoming(nested, exitBlock);
508            mCarrySummaryStack[n - 2] = phi;
509        }
510    }
511    --mIfDepth;
512    leaveScope(iBuilder);
513    mCarrySummaryStack.pop_back();
514}
515
516/** ------------------------------------------------------------------------------------------------------------ *
517 * @brief enterScope
518 ** ------------------------------------------------------------------------------------------------------------- */
519void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
520    assert (scope);
521    // Store the state of the current frame and update the scope state
522    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
523    mCurrentScope = scope;
524    mCarryScopeIndex.push_back(++mCarryScopes);
525    mCarryInfo = &mCarryMetadata[mCarryScopes];
526    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
527    // compilation or a memory corruption has occured.
528    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
529    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
530    // Verify we're pointing to a carry frame struct
531    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
532    mCurrentFrameIndex = 0;
533}
534
535/** ------------------------------------------------------------------------------------------------------------- *
536 * @brief leaveScope
537 ** ------------------------------------------------------------------------------------------------------------- */
538void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */) {
539
540    // Did we use all of the packs in this carry struct?
541    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
542    // Sanity test: are there remaining carry frames?
543    assert (!mCarryFrameStack.empty());
544
545    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
546
547    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
548
549    mCarryFrameStack.pop_back();
550    mCarryScopeIndex.pop_back();
551    assert (!mCarryScopeIndex.empty());
552    mCurrentScope = mCurrentScope->getPredecessor();
553    assert (mCurrentScope);
554    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
555}
556
557/** ------------------------------------------------------------------------------------------------------------- *
558 * @brief addCarryInCarryOut
559 ** ------------------------------------------------------------------------------------------------------------- */
560Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
561    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
562    Value * const carryIn = getNextCarryIn(iBuilder);
563    Value * carryOut, * result;
564    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
565    setNextCarryOut(iBuilder, carryOut);
566    assert (result->getType() == iBuilder->getBitBlockType());
567    return result;
568}
569
570/** ------------------------------------------------------------------------------------------------------------- *
571 * @brief advanceCarryInCarryOut
572 ** ------------------------------------------------------------------------------------------------------------- */
573Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * const advance, Value * const value) {
574    const auto shiftAmount = advance->getAmount();
575    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
576        Value * const carryIn = getNextCarryIn(iBuilder);
577        Value * carryOut, * result;
578        std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
579        setNextCarryOut(iBuilder, carryOut);
580        assert (result->getType() == iBuilder->getBitBlockType());
581        return result;
582    } else {
583        return longAdvanceCarryInCarryOut(iBuilder, value, shiftAmount);
584    }
585}
586
587/** ------------------------------------------------------------------------------------------------------------- *
588 * @brief longAdvanceCarryInCarryOut
589 ** ------------------------------------------------------------------------------------------------------------- */
590inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value, const unsigned shiftAmount) {
591
592    assert (mHasLongAdvance);
593    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
594
595    Type * const streamTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
596
597    if (mIfDepth > 0) {
598        if (shiftAmount > iBuilder->getBitBlockWidth()) {
599            const auto frameIndex = mCurrentFrameIndex++;
600            Value * carry = iBuilder->CreateZExt(iBuilder->bitblock_any(value), streamTy);
601            const unsigned summarySize = ceil_udiv(shiftAmount, iBuilder->getBitBlockWidth() * iBuilder->getBitBlockWidth());
602            for (unsigned i = 0;;++i) {
603                Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), iBuilder->getInt32(i)});
604                Value * const prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamTy);
605                Value * const stream = iBuilder->CreateBitCast(iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry), iBuilder->getBitBlockType());
606                if (LLVM_LIKELY(i == summarySize)) {
607                    Value * const maskedStream = iBuilder->CreateAnd(stream, iBuilder->bitblock_mask_from(iBuilder->getInt32(summarySize % iBuilder->getBitBlockWidth())));
608                    addToCarryOutSummary(iBuilder, maskedStream);
609                    iBuilder->CreateBlockAlignedStore(maskedStream, ptr);
610                    break;
611                }
612                addToCarryOutSummary(iBuilder, stream);
613                iBuilder->CreateBlockAlignedStore(stream, ptr);
614                carry = iBuilder->CreateLShr(prior, iBuilder->getBitBlockWidth() - 1);
615            }
616        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
617            addToCarryOutSummary(iBuilder, value);
618        }
619    }
620    const auto frameIndex = mCurrentFrameIndex++;
621    // special case using a single buffer entry and the carry_out value.
622    if (LLVM_LIKELY((shiftAmount < iBuilder->getBitBlockWidth()) && (mLoopDepth == 0))) {
623        Value * const buffer = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), iBuilder->getInt32(0)});
624        assert (buffer->getType()->getPointerElementType() == iBuilder->getBitBlockType());
625        Value * carryIn = iBuilder->CreateBlockAlignedLoad(buffer);
626        iBuilder->CreateBlockAlignedStore(value, buffer);
627        /* Very special case - no combine */
628        if (LLVM_UNLIKELY(shiftAmount == iBuilder->getBitBlockWidth())) {
629            return iBuilder->CreateBitCast(carryIn, iBuilder->getBitBlockType());
630        }
631        Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), iBuilder->getBitBlockWidth() - shiftAmount);
632        Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, streamTy), shiftAmount);
633        return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), iBuilder->getBitBlockType());
634    } else { //
635        const unsigned blockShift = shiftAmount % iBuilder->getBitBlockWidth();
636        const unsigned blocks = ceil_udiv(shiftAmount, iBuilder->getBitBlockWidth());
637        // Create a mask to implement circular buffer indexing
638        Value * indexMask = iBuilder->getSize(nearest_pow2(blocks + ((mLoopDepth != 0) ? 1 : 0)) - 1);
639        Value * blockIndex = iBuilder->getScalarField("CarryBlockIndex");
640        Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(blocks));
641        Value * loadIndex0 = iBuilder->CreateAnd(carryIndex0, indexMask);
642        Value * const carryInPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), loadIndex0});
643        Value * carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
644
645        Value * storeIndex = iBuilder->CreateAnd(blockIndex, indexMask);
646        Value * const carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), storeIndex});
647        assert (carryIn->getType() == iBuilder->getBitBlockType());
648
649        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
650        // block in the long advance carry data area.
651        if (LLVM_UNLIKELY(blockShift == 0)) {
652            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
653            return carryIn;
654        } else { // Otherwise we need to combine data from the two oldest blocks.
655            Value * carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(blocks - 1));
656            Value * loadIndex1 = iBuilder->CreateAnd(carryIndex1, indexMask);
657            Value * const carryInPtr2 = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), loadIndex1});
658            Value * carry_block1 = iBuilder->CreateBlockAlignedLoad(carryInPtr2);
659            Value * block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), iBuilder->getBitBlockWidth() - blockShift);
660            Value * block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, streamTy), blockShift);
661            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
662            return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), iBuilder->getBitBlockType());
663        }
664    }
665}
666
667/** ------------------------------------------------------------------------------------------------------------- *
668 * @brief getNextCarryIn
669 ** ------------------------------------------------------------------------------------------------------------- */
670Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
671    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
672    if (mLoopDepth == 0) {
673        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
674    } else {
675        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
676    }
677    Type * const carryTy = iBuilder->getBitBlockType();
678    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
679    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
680    if (mLoopDepth > 0) {
681        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
682    }
683    return carryIn;
684}
685
686/** ------------------------------------------------------------------------------------------------------------- *
687 * @brief setNextCarryOut
688 ** ------------------------------------------------------------------------------------------------------------- */
689void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * carryOut) {
690    Type * const carryTy = iBuilder->getBitBlockType();
691    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
692    carryOut = iBuilder->CreateBitCast(carryOut, carryTy);
693    if (mCarryInfo->hasSummary()) {
694        addToCarryOutSummary(iBuilder, carryOut);
695    }
696    if (mLoopDepth != 0) {
697        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
698        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
699            Value * accum = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
700            carryOut = iBuilder->CreateOr(carryOut, accum);
701        }
702    }
703    ++mCurrentFrameIndex;
704    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
705    iBuilder->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
706}
707
708/** ------------------------------------------------------------------------------------------------------------- *
709 * @brief readCarryInSummary
710 ** ------------------------------------------------------------------------------------------------------------- */
711Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, ConstantInt * index) const {
712    assert (mCarryInfo->hasSummary());
713    unsigned count = 2;
714    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
715        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
716        count = 1;
717        while (frameTy->isStructTy()) {
718            ++count;
719            frameTy = frameTy->getStructElementType(0);
720        }
721    }
722    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
723    Value * indicies[length];
724    std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
725    indicies[count - 1] = index;
726    if (mLoopDepth != 0) {
727        indicies[count] = mLoopSelector;
728    }
729
730    ArrayRef<Value *> ar(indicies, length);
731    Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
732    Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
733    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
734        Type * const carryTy = iBuilder->getBitBlockType();
735        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
736    }
737    return summary;
738}
739
740/** ------------------------------------------------------------------------------------------------------------- *
741 * @brief writeCarryOutSummary
742 ** ------------------------------------------------------------------------------------------------------------- */
743inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const summary, ConstantInt * index) const {
744    Value * ptr = nullptr;
745    assert (mCarryInfo->hasExplicitSummary());
746    if (mLoopDepth > 0) {
747        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
748    } else {
749        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
750    }
751    iBuilder->CreateBlockAlignedStore(summary, ptr);
752}
753
754/** ------------------------------------------------------------------------------------------------------------- *
755 * @brief addToCarryOutSummary
756 ** ------------------------------------------------------------------------------------------------------------- */
757inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value) {
758    assert ("cannot add null summary value!" && value);   
759    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
760    assert (mCarryInfo->hasSummary());
761    mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
762}
763
764/** ------------------------------------------------------------------------------------------------------------- *
765 * @brief enumerate
766 ** ------------------------------------------------------------------------------------------------------------- */
767unsigned CarryManager::getScopeCount(const PabloBlock * const scope, unsigned index) {
768    for (const Statement * stmt : *scope) {
769        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
770            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
771        }
772    }
773    return index + 1;
774}
775
776/** ------------------------------------------------------------------------------------------------------------- *
777 * @brief hasIterationSpecificAssignment
778 ** ------------------------------------------------------------------------------------------------------------- */
779bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
780#if 0
781    return dyn_cast_or_null<While>(scope->getBranch()) != nullptr;
782#else
783    if (const While * const br = dyn_cast_or_null<While>(scope->getBranch())) {
784        for (const Var * var : br->getEscaped()) {
785            for (const PabloAST * user : var->users()) {
786                if (const Extract * e = dyn_cast<Extract>(user)) {
787                    if (LLVM_UNLIKELY(e->getIndex() == var)) {
788                        // If we assign this Var a value and read the value as the index parameter
789                        // of a nested Extract statement, then we cannot collapse the carries.
790                        const PabloBlock * parent = e->getParent();
791                        for (;;) {
792                            if (parent == scope) {
793                                return true;
794                            }
795                            parent = parent->getPredecessor();
796                            if (parent == nullptr) {
797                                break;
798                            }
799                        }
800                    }
801                }
802            }
803        }
804    }
805    return false;
806#endif
807}
808
809/** ------------------------------------------------------------------------------------------------------------- *
810 * @brief analyse
811 ** ------------------------------------------------------------------------------------------------------------- */
812StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope,
813                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
814    assert ("scope cannot be null!" && scope);
815    assert ("entry scope (and only the entry scope) must be in scope 0"
816            && (mCarryScopes == 0 ? (scope == mKernel->getEntryBlock()) : (scope != mKernel->getEntryBlock())));
817    assert (mCarryScopes < mCarryMetadata.size());
818    Type * const carryTy = iBuilder->getBitBlockType();
819    Type * const blockTy = iBuilder->getBitBlockType();
820
821    const unsigned carryScopeIndex = mCarryScopes++;
822    const bool nonCarryCollapsingMode = hasIterationSpecificAssignment(scope);
823    Type * const carryPackType = (loopDepth == 0) ? carryTy : ArrayType::get(carryTy, 2);
824    std::vector<Type *> state;
825
826    for (const Statement * stmt : *scope) {
827        if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
828            const auto amount = cast<Advance>(stmt)->getAmount();
829            Type * type = carryPackType;
830            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
831                const unsigned blocks = ceil_udiv(amount, iBuilder->getBitBlockWidth());
832                type = ArrayType::get(blockTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
833                if (LLVM_UNLIKELY(ifDepth > 0 && amount > iBuilder->getBitBlockWidth())) {
834                    // 1 bit will mark the presense of any bit in each block.
835                    Type * carryType = ArrayType::get(blockTy, ceil_udiv(amount, std::pow(iBuilder->getBitBlockWidth(), 2)));
836                    state.push_back(carryType);
837                }
838                mHasLongAdvance = true;               
839            }
840            state.push_back(type);
841        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
842            state.push_back(carryPackType);
843        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
844            state.push_back(analyse(iBuilder, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
845        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
846            mHasLoop = true;
847            state.push_back(analyse(iBuilder, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
848        }
849    }
850    // Build the carry state struct and add the summary pack if needed.
851    CarryData & cd = mCarryMetadata[carryScopeIndex];
852    StructType * carryState = nullptr;
853    CarryData::SummaryType summaryType = CarryData::NoSummary;
854    if (LLVM_UNLIKELY(state.empty())) {
855        carryState = StructType::get(iBuilder->getContext());
856    } else {
857        if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
858            if (LLVM_LIKELY(state.size() > 1)) {
859                summaryType = CarryData::ExplicitSummary;
860                // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
861                // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
862                state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
863            } else {
864                summaryType = CarryData::ImplicitSummary;
865                if (state[0]->isStructTy()) {
866                    summaryType = CarryData::BorrowedSummary;
867                }
868            }           
869        }
870        carryState = StructType::get(iBuilder->getContext(), state);
871        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
872        // carry state pointer, and summary pointer struct.
873        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
874            mHasNonCarryCollapsingLoops = true;
875            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo(), nullptr);
876            assert (isDynamicallyAllocatedType(carryState));
877        }
878        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
879    }
880    cd.setSummaryType(summaryType);
881    return carryState;
882}
883
884/** ------------------------------------------------------------------------------------------------------------- *
885 * @brief constructor
886 ** ------------------------------------------------------------------------------------------------------------- */
887CarryManager::CarryManager() noexcept
888: mKernel(nullptr)
889, mCurrentFrame(nullptr)
890, mCurrentFrameIndex(0)
891, mCurrentScope(nullptr)
892, mCarryInfo(nullptr)
893, mNextSummaryTest(nullptr)
894, mIfDepth(0)
895, mHasLongAdvance(false)
896, mHasNonCarryCollapsingLoops(false)
897, mHasLoop(false)
898, mLoopDepth(0)
899, mLoopSelector(nullptr)
900, mNextLoopSelector(nullptr)
901, mCarryPackPtr(nullptr)
902, mCarryScopes(0) {
903
904}
905
906}
Note: See TracBrowser for help on using the repository browser.