source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5705

Last change on this file since 5705 was 5705, checked in by cameron, 21 months ago

Drop linebreak normalization; add1 attribute for grep kernel; pablo indexed advance initial check-in

File size: 51.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <llvm/IR/DerivedTypes.h>
12#include <llvm/Transforms/Utils/Local.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18#include <kernels/kernel_builder.h>
19#include <toolchain/toolchain.h>
20#include <array>
21
22using namespace llvm;
23
24namespace pablo {
25
26inline static bool is_power_2(const unsigned n) {
27    return (n && ((n & (n - 1)) == 0));
28}
29
30inline static unsigned ceil_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
32    return (sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v - 1U);
33}
34
35inline static unsigned floor_log2(const unsigned v) {
36    assert ("log2(0) is undefined!" && v != 0);
37    return ((sizeof(unsigned) * CHAR_BIT) - 1U) - __builtin_clz(v);
38}
39
40inline static unsigned nearest_pow2(const unsigned v) {
41    assert (v > 0 && v < (UINT32_MAX / 2));
42    return (v < 2) ? 1 : (1U << ceil_log2(v));
43}
44
45inline static unsigned nearest_multiple(const unsigned n, const unsigned m) {
46    assert (is_power_2(m));
47    const unsigned r = (n + m - 1U) & -m;
48    assert (r >= n);
49    return r;
50}
51
52inline static bool is_multiple_of(const unsigned n, const unsigned m) {
53    return nearest_multiple(n, m) == n;
54}
55
56inline static unsigned udiv(const unsigned x, const unsigned y) {
57    assert (is_power_2(y));
58    const unsigned z = x >> floor_log2(y);
59    assert (z == (x / y));
60    return z;
61}
62
63inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
64    return (((x - 1) | (y - 1)) + 1) / y;
65}
66
67using TypeId = PabloAST::ClassTypeId;
68
69inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
70    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt) && !isa<IndexedAdvance>(stmt);
71}
72
73#define LONG_ADVANCE_BREAKPOINT 64
74
75/** ------------------------------------------------------------------------------------------------------------- *
76 * @brief initializeCarryData
77 ** ------------------------------------------------------------------------------------------------------------- */
78void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
79
80    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
81    // that is added to the Kernel. The scope index will indicate which struct to access.
82
83    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
84    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
85    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
86    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
87    // the fact every pablo While loop must be executed at least once.
88
89    // A nested loop may also contain a variable length CarryData struct
90
91    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
92    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
93    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
94    // set of carries for that iteration.
95
96    assert (mKernel == nullptr);
97    mCurrentScope = kernel->getEntryBlock();
98    mKernel = kernel;
99
100    mCarryScopes = 0;
101
102    mCarryMetadata.resize(getScopeCount(mCurrentScope));
103
104    Type * const carryStateTy = analyse(iBuilder, mCurrentScope);
105
106    kernel->addScalar(carryStateTy, "carries");
107
108    if (mHasLoop) {
109        kernel->addScalar(iBuilder->getInt32Ty(), "selector");
110    }
111    if (mHasLongAdvance) {
112        kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
113    }
114}
115
116bool isDynamicallyAllocatedType(const Type * const ty) {
117    if (isa<StructType>(ty) && ty->getStructNumElements() == 3) {
118        return (ty->getStructElementType(1)->isPointerTy() && ty->getStructElementType(2)->isPointerTy() && ty->getStructElementType(0)->isIntegerTy());
119    }
120    return false;
121}
122
123bool containsDynamicallyAllocatedType(const Type * const ty) {
124    if (isa<StructType>(ty)) {
125        for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
126            if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
127                return true;
128            }
129        }
130    }
131    return false;
132}
133
134void freeDynamicallyAllocatedMemory(const std::unique_ptr<kernel::KernelBuilder> & idb, Value * const frame) {
135    StructType * const ty = cast<StructType>(frame->getType()->getPointerElementType());
136    std::array<Value *, 3> indices;
137    indices[0] = idb->getInt32(0);
138    for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
139        if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
140            indices[1] = idb->getInt32(i);
141            indices[2] = idb->getInt32(1);
142            Value * const innerFrame = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
143            if (containsDynamicallyAllocatedType(innerFrame->getType())) {
144                indices[2] = indices[0];
145                Value *  const count = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
146                BasicBlock * const entry = idb->GetInsertBlock();
147                BasicBlock * const cond = idb->CreateBasicBlock("freeCarryDataCond");
148                BasicBlock * const body = idb->CreateBasicBlock("freeCarryDataLoop");
149                BasicBlock * const exit = idb->CreateBasicBlock("freeCarryDataExit");
150                idb->CreateBr(cond);
151                idb->SetInsertPoint(cond);
152                PHINode * const index = idb->CreatePHI(count->getType(), 2);
153                index->addIncoming(ConstantInt::getNullValue(count->getType()), entry);
154                Value * test = idb->CreateICmpNE(index, count);
155                idb->CreateCondBr(test, body, exit);
156                idb->SetInsertPoint(body);
157                freeDynamicallyAllocatedMemory(idb, idb->CreateGEP(innerFrame, index));
158                index->addIncoming(idb->CreateAdd(index, ConstantInt::get(count->getType(), 1)), body);
159                idb->CreateBr(cond);
160                idb->SetInsertPoint(exit);
161            }
162            idb->CreateFree(innerFrame);
163            indices[2] = idb->getInt32(2);
164            Value *  const summary = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
165            idb->CreateFree(summary);
166        }
167    }
168}
169
170/** ------------------------------------------------------------------------------------------------------------- *
171 * @brief releaseCarryData
172 ** ------------------------------------------------------------------------------------------------------------- */
173void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
174    if (mHasNonCarryCollapsingLoops) {
175        freeDynamicallyAllocatedMemory(idb, idb->getScalarFieldPtr("carries"));
176    }
177}
178
179/** ------------------------------------------------------------------------------------------------------------- *
180 * @brief initializeCodeGen
181 ** ------------------------------------------------------------------------------------------------------------- */
182void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
183
184    assert(!mCarryMetadata.empty());
185    mCarryInfo = &mCarryMetadata[0];
186    assert (!mCarryInfo->hasSummary());
187
188    mCurrentFrame = iBuilder->getScalarFieldPtr("carries");
189    mCurrentFrameIndex = 0;
190    mCarryScopes = 0;
191    mCarryScopeIndex.push_back(0);
192
193    assert (mCarryFrameStack.empty());
194
195    assert (mCarrySummaryStack.empty());
196
197    Type * const carryTy = iBuilder->getBitBlockType();
198
199    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
200
201    if (mHasLoop) {       
202        mLoopSelector = iBuilder->getScalarField("selector");
203        mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
204    }
205
206}
207
208/** ------------------------------------------------------------------------------------------------------------- *
209 * @brief finalizeCodeGen
210 ** ------------------------------------------------------------------------------------------------------------- */
211void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
212    if (mHasLoop) {
213        iBuilder->setScalarField("selector", mNextLoopSelector);
214    }
215    if (mHasLongAdvance) {
216        Value * idx = iBuilder->getScalarField("CarryBlockIndex");
217        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
218        iBuilder->setScalarField("CarryBlockIndex", idx);
219    }
220    assert (mCarryFrameStack.empty());   
221    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
222    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
223    mCarrySummaryStack.clear();
224    assert (mCarryScopeIndex.size() == 1);
225    mCarryScopeIndex.clear();
226}
227
228/** ------------------------------------------------------------------------------------------------------------- *
229 * @brief enterLoopScope
230 ** ------------------------------------------------------------------------------------------------------------- */
231void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
232    assert (scope);
233    assert (mHasLoop);
234    ++mLoopDepth;
235    enterScope(iBuilder, scope);
236}
237
238/** ------------------------------------------------------------------------------------------------------------- *
239 * @brief enterLoopBody
240 ** ------------------------------------------------------------------------------------------------------------- */
241void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock) {
242    if (mCarryInfo->hasSummary()) {
243        Type * const carryTy = iBuilder->getBitBlockType();
244        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(carryTy, 2, "summary");
245        assert (!mCarrySummaryStack.empty());
246        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
247        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
248        // properly OR it into the outgoing summary value.
249        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
250        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
251        mCarrySummaryStack.back() = phiCarryOutSummary;
252        mCarrySummaryStack.push_back(phiCarryOutSummary);
253    }
254    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
255
256        assert (mCarryInfo->hasSummary());
257
258        Type * const int8PtrTy = iBuilder->getInt8PtrTy();
259        Type * const carryTy = iBuilder->getBitBlockType();
260        PointerType * const carryPtrTy = carryTy->getPointerTo();
261
262        // Check whether we need to resize the carry state
263        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
264        mLoopIndicies.push_back(index);
265        index->addIncoming(iBuilder->getSize(0), entryBlock);
266        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
267        Value * capacity = iBuilder->CreateLoad(capacityPtr, "capacity");
268        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
269        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
270        Value * array = iBuilder->CreateLoad(arrayPtr, "array");
271        BasicBlock * const entry = iBuilder->GetInsertBlock();
272        BasicBlock * const resizeCarryState = iBuilder->CreateBasicBlock("ResizeCarryState");
273        BasicBlock * const reallocExisting = iBuilder->CreateBasicBlock("ReallocExisting");
274        BasicBlock * const createNew = iBuilder->CreateBasicBlock("CreateNew");
275        BasicBlock * const resumeKernel = iBuilder->CreateBasicBlock("ResumeKernel");
276        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
277
278        // RESIZE CARRY BLOCK
279        iBuilder->SetInsertPoint(resizeCarryState);
280        const auto BlockWidth = iBuilder->getBitBlockWidth() / 8;
281        const auto Log2BlockWidth = floor_log2(BlockWidth);
282        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
283        Value * const summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
284        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
285        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
286
287        // REALLOCATE EXISTING
288        iBuilder->SetInsertPoint(reallocExisting);
289        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
290        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
291        Value * const newArray = iBuilder->CreateCacheAlignedMalloc(newCapacitySize);
292        iBuilder->CreateMemCpy(newArray, array, capacitySize, iBuilder->getCacheAlignment());
293        iBuilder->CreateFree(array);
294        iBuilder->CreateStore(newArray, arrayPtr);
295        Value * const startNewArrayPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), capacitySize);
296        iBuilder->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
297        Value * const newCapacity = iBuilder->CreateShl(capacity, 1);
298        iBuilder->CreateStore(newCapacity, capacityPtr);
299        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
300        Value * const summarySize = iBuilder->CreateShl(iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
301        Constant * const additionalSpace = iBuilder->getSize(2 * BlockWidth);
302        Value * const newSummarySize = iBuilder->CreateAdd(summarySize, additionalSpace);
303        Value * const newSummary = iBuilder->CreateBlockAlignedMalloc(newSummarySize);
304        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
305        iBuilder->CreateFree(summary);
306        iBuilder->CreateStore(iBuilder->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
307        Value * const startNewSummaryPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newSummary, int8PtrTy), summarySize);
308        iBuilder->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
309        iBuilder->CreateBr(resumeKernel);
310
311        // CREATE NEW
312        iBuilder->SetInsertPoint(createNew);
313        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
314        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
315        iBuilder->CreateStore(initialCapacity, capacityPtr);
316        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
317        Value * initialArray = iBuilder->CreateCacheAlignedMalloc(initialCapacitySize);
318        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
319        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
320        iBuilder->CreateStore(initialArray, arrayPtr);
321        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
322        Value * initialSummary = iBuilder->CreateBlockAlignedMalloc(initialSummarySize);
323        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
324        initialSummary = iBuilder->CreatePointerCast(initialSummary, carryPtrTy);
325        iBuilder->CreateStore(initialSummary, summaryPtr);
326        iBuilder->CreateBr(resumeKernel);
327
328        // RESUME KERNEL
329        iBuilder->SetInsertPoint(resumeKernel);
330        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
331        phiArrayPtr->addIncoming(array, entry);
332        phiArrayPtr->addIncoming(initialArray, createNew);
333        phiArrayPtr->addIncoming(newArray, reallocExisting);
334
335        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
336        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
337        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
338    }
339}
340
341/** ------------------------------------------------------------------------------------------------------------- *
342 * @brief leaveLoopBody
343 ** ------------------------------------------------------------------------------------------------------------- */
344void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * /* exitBlock */) {
345
346    Type * const carryTy = iBuilder->getBitBlockType();
347
348    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
349
350        assert (mCarryInfo->hasSummary());
351
352        ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
353
354        Value * const carryInAccumulator = readCarryInSummary(iBuilder, summaryIndex);
355        Value * const carryOutAccumulator = mCarrySummaryStack.back();
356
357        if (mCarryInfo->hasExplicitSummary()) {
358            writeCarryOutSummary(iBuilder, carryOutAccumulator, summaryIndex);
359        }
360
361        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
362        mCarryFrameStack.pop_back();
363
364        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
365        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
366        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
367        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
368        // (for the subsequent block to subtract) using a half-adder circuit.
369
370        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
371        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
372        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
373
374        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
375        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
376        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
377        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
378
379        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
380
381        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
382
383        BasicBlock * entry = iBuilder->GetInsertBlock();
384        BasicBlock * update = iBuilder->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
385        BasicBlock * resume = iBuilder->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
386
387        iBuilder->CreateBr(update);
388
389        iBuilder->SetInsertPoint(update);
390        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
391        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
392        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
393        borrow->addIncoming(carryInAccumulator, entry);
394        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
395        carry->addIncoming(carryOutAccumulator, entry);
396        // OR the updated carry in summary later for the summaryTest
397        PHINode * const carryInSummary = iBuilder->CreatePHI(carryTy, 2);
398        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
399
400        // half subtractor
401        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
402        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
403        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
404        Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
405        Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
406
407        iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
408        carryInSummary->addIncoming(nextSummary, update);
409        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
410        borrow->addIncoming(finalBorrow, update);
411
412        // half adder
413        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
414        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
415        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
416        Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
417
418        iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
419        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
420        carry->addIncoming(finalCarry, update);
421
422        // loop condition
423        i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
424        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
425
426        iBuilder->SetInsertPoint(resume);
427
428        if (codegen::EnableAsserts) {
429            iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
430                                       "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
431        }
432
433        assert (!mLoopIndicies.empty());
434        PHINode * index = mLoopIndicies.back();
435        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
436        mLoopIndicies.pop_back();
437
438        mNextSummaryTest = nextSummary;
439    }
440    if (mCarryInfo->hasSummary()) {
441        const auto n = mCarrySummaryStack.size(); assert (n > 1);
442        Value * carryOut = mCarrySummaryStack.back();
443        mCarrySummaryStack.pop_back();
444        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
445        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
446        // If we're returning to the base scope, reset our accumulated summary value.
447        if (n == 2) {
448            carryOut = Constant::getNullValue(carryTy);
449        }
450        mCarrySummaryStack.back() = carryOut;
451    }
452}
453
454/** ------------------------------------------------------------------------------------------------------------- *
455 * @brief leaveLoopScope
456 ** ------------------------------------------------------------------------------------------------------------- */
457void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
458    assert (mLoopDepth > 0);
459    --mLoopDepth;
460    leaveScope(iBuilder);
461}
462
463/** ------------------------------------------------------------------------------------------------------------- *
464 * @brief enterIfScope
465 ** ------------------------------------------------------------------------------------------------------------- */
466void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
467    ++mIfDepth;
468    enterScope(iBuilder, scope);
469    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
470    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
471    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
472    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
473        assert (mCurrentFrameIndex == 0);
474        mNextSummaryTest = readCarryInSummary(iBuilder, iBuilder->getInt32(0));
475        if (mCarryInfo->hasExplicitSummary()) {
476            mCurrentFrameIndex = 1;
477        }
478    }
479    Type * const carryTy = iBuilder->getBitBlockType();
480    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
481}
482
483/** ------------------------------------------------------------------------------------------------------------- *
484 * @brief generateSummaryTest
485 ** ------------------------------------------------------------------------------------------------------------- */
486Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * condition) {
487    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
488        assert ("summary test was not generated" && mNextSummaryTest);
489        condition = iBuilder->simd_or(condition, mNextSummaryTest);
490        mNextSummaryTest = nullptr;
491    }
492    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
493    return condition;
494}
495
496/** ------------------------------------------------------------------------------------------------------------- *
497 * @brief enterIfBody
498 ** ------------------------------------------------------------------------------------------------------------- */
499void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */, BasicBlock * const entryBlock) {
500    assert (entryBlock);
501}
502
503/** ------------------------------------------------------------------------------------------------------------- *
504 * @brief leaveIfBody
505 ** ------------------------------------------------------------------------------------------------------------- */
506void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const exitBlock) {
507    assert (exitBlock);
508    const auto n = mCarrySummaryStack.size();
509    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
510        writeCarryOutSummary(iBuilder, mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
511    }
512    if (n > 2) {
513        mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
514    }
515}
516
517/** ------------------------------------------------------------------------------------------------------------- *
518 * @brief leaveIfScope
519 ** ------------------------------------------------------------------------------------------------------------- */
520void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
521    assert (mIfDepth > 0);
522    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
523        const auto n = mCarrySummaryStack.size(); assert (n > 0);
524        if (n > 2) {
525            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
526            // appropriate summary is stored in the outer scope.
527            Value * nested = mCarrySummaryStack[n - 1];
528            Value * outer = mCarrySummaryStack[n - 2];
529            assert (nested->getType() == outer->getType());
530            PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
531            phi->addIncoming(outer, entryBlock);
532            phi->addIncoming(nested, exitBlock);
533            mCarrySummaryStack[n - 2] = phi;
534        }
535    }
536    --mIfDepth;
537    leaveScope(iBuilder);
538    mCarrySummaryStack.pop_back();
539}
540
541/** ------------------------------------------------------------------------------------------------------------ *
542 * @brief enterScope
543 ** ------------------------------------------------------------------------------------------------------------- */
544void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
545    assert (scope);
546    // Store the state of the current frame and update the scope state
547    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
548    mCurrentScope = scope;
549    mCarryScopeIndex.push_back(++mCarryScopes);
550    mCarryInfo = &mCarryMetadata[mCarryScopes];
551    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
552    // compilation or a memory corruption has occured.
553    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
554    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
555    // Verify we're pointing to a carry frame struct
556    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
557    mCurrentFrameIndex = 0;
558}
559
560/** ------------------------------------------------------------------------------------------------------------- *
561 * @brief leaveScope
562 ** ------------------------------------------------------------------------------------------------------------- */
563void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */) {
564
565    // Did we use all of the packs in this carry struct?
566    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
567    // Sanity test: are there remaining carry frames?
568    assert (!mCarryFrameStack.empty());
569
570    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
571
572    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
573
574    mCarryFrameStack.pop_back();
575    mCarryScopeIndex.pop_back();
576    assert (!mCarryScopeIndex.empty());
577    mCurrentScope = mCurrentScope->getPredecessor();
578    assert (mCurrentScope);
579    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
580}
581
582/** ------------------------------------------------------------------------------------------------------------- *
583 * @brief addCarryInCarryOut
584 ** ------------------------------------------------------------------------------------------------------------- */
585Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
586    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
587    Value * const carryIn = getNextCarryIn(iBuilder);
588    Value * carryOut, * result;
589    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
590    setNextCarryOut(iBuilder, carryOut);
591    assert (result->getType() == iBuilder->getBitBlockType());
592    return result;
593}
594
595/** ------------------------------------------------------------------------------------------------------------- *
596 * @brief advanceCarryInCarryOut
597 ** ------------------------------------------------------------------------------------------------------------- */
598Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * const advance, Value * const value) {
599    const auto shiftAmount = advance->getAmount();
600    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
601        Value * const carryIn = getNextCarryIn(iBuilder);
602        Value * carryOut, * result;
603        std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
604        setNextCarryOut(iBuilder, carryOut);
605        assert (result->getType() == iBuilder->getBitBlockType());
606        return result;
607    } else {
608        return longAdvanceCarryInCarryOut(iBuilder, value, shiftAmount);
609    }
610}
611
612Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const IndexedAdvance * const advance, Value * const value, Value * const index_strm) {
613    llvm::report_fatal_error("IndexedAdvance not yet supported.");
614}
615
616/** ------------------------------------------------------------------------------------------------------------- *
617 * @brief longAdvanceCarryInCarryOut
618 ** ------------------------------------------------------------------------------------------------------------- */
619inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value, const unsigned shiftAmount) {
620
621    assert (mHasLongAdvance);
622    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
623
624    const auto blockWidth = iBuilder->getBitBlockWidth();
625    Type * const streamTy = iBuilder->getIntNTy(blockWidth);
626
627    Value * indices[3];
628
629    indices[0] = iBuilder->getInt32(0);
630
631    if (mIfDepth > 0) {
632        if (shiftAmount > blockWidth) {
633
634            Value * carry = iBuilder->CreateZExt(iBuilder->bitblock_any(value), streamTy);
635            const auto summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
636            const auto summarySize = ceil_udiv(summaryBlocks, blockWidth);
637            VectorType * const bitBlockTy = iBuilder->getBitBlockType();
638            IntegerType * const laneTy = cast<IntegerType>(bitBlockTy->getVectorElementType());
639            const auto laneWidth = laneTy->getIntegerBitWidth();
640
641            assert (summarySize > 0);
642            assert (is_power_2(laneWidth));
643
644            indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
645
646            for (unsigned i = 1;;++i) {
647
648                assert (i <= summarySize);
649
650                indices[2] = iBuilder->getInt32(i - 1);
651
652                Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, indices);
653                Value * const prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamTy);
654
655                Value * advanced = nullptr;
656                if (LLVM_LIKELY(summaryBlocks < laneWidth)) {
657                    advanced = iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry);
658                    carry = iBuilder->CreateLShr(prior, summaryBlocks - 1);
659                } else {
660                    std::tie(advanced, carry) = iBuilder->bitblock_advance(prior, carry, 1);
661                }
662                Value * stream = iBuilder->CreateBitCast(advanced, bitBlockTy);
663                if (LLVM_LIKELY(i == summarySize)) {
664                    const auto n = bitBlockTy->getVectorNumElements();
665                    Constant * mask[n];                                       
666                    const auto m = udiv(summaryBlocks, laneWidth);
667                    if (m) {
668                        std::fill_n(mask, m, ConstantInt::getAllOnesValue(laneTy));
669                    }
670                    mask[m] = ConstantInt::get(laneTy, (1UL << (summaryBlocks & (laneWidth - 1))) - 1UL);
671                    if (n > m) {
672                        std::fill_n(mask + m + 1, n - m, UndefValue::get(laneTy));
673                    }
674                    stream = iBuilder->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
675
676                    addToCarryOutSummary(iBuilder, stream);
677                    iBuilder->CreateBlockAlignedStore(stream, ptr);
678                    RecursivelyDeleteTriviallyDeadInstructions(carry);
679                    break;
680                }
681                addToCarryOutSummary(iBuilder, stream);
682                iBuilder->CreateBlockAlignedStore(stream, ptr);
683            }
684
685        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
686            addToCarryOutSummary(iBuilder, value);
687        }
688    }
689
690    indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
691
692    // special case using a single buffer entry and the carry_out value.
693    if (LLVM_LIKELY((shiftAmount < blockWidth) && (mLoopDepth == 0))) {
694
695        indices[2] = indices[0]; // iBuilder->getInt32(0)
696        assert (cast<ConstantInt>(indices[2])->isNullValue());
697
698        Value * const buffer = iBuilder->CreateGEP(mCurrentFrame, indices);
699        assert (buffer->getType()->getPointerElementType() == iBuilder->getBitBlockType());
700        Value * carryIn = iBuilder->CreateBlockAlignedLoad(buffer);
701
702        iBuilder->CreateBlockAlignedStore(value, buffer);
703        /* Very special case - no combine */
704        if (LLVM_UNLIKELY(shiftAmount == blockWidth)) {
705            return iBuilder->CreateBitCast(carryIn, iBuilder->getBitBlockType());
706        }
707        Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
708        Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, streamTy), shiftAmount);
709        return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), iBuilder->getBitBlockType());
710    } else { //
711        const unsigned blockShift = shiftAmount & (blockWidth - 1);
712        const unsigned summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
713
714        // Create a mask to implement circular buffer indexing
715        Value * indexMask = iBuilder->getSize(nearest_pow2(summaryBlocks) - 1);
716        Value * blockIndex = iBuilder->getScalarField("CarryBlockIndex");
717
718        Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks));
719        indices[2] = iBuilder->CreateAnd(carryIndex0, indexMask);
720        Value * const carryInPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
721        Value * carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
722
723        indices[2] = iBuilder->CreateAnd(blockIndex, indexMask);
724        Value * const carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
725        assert (carryIn->getType() == iBuilder->getBitBlockType());
726
727        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
728        // block in the long advance carry data area.
729        if (LLVM_UNLIKELY(blockShift == 0)) {
730            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
731            return carryIn;
732        } else { // Otherwise we need to combine data from the two oldest blocks.
733            Value * const carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks - 1));
734            indices[2] = iBuilder->CreateAnd(carryIndex1, indexMask);
735
736            Value * const carryInPtr2 = iBuilder->CreateGEP(mCurrentFrame, indices);
737            Value * const carryIn2 = iBuilder->CreateBlockAlignedLoad(carryInPtr2);
738
739            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
740
741            Value * const b0 = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
742            Value * const b1 = iBuilder->CreateShl(iBuilder->CreateBitCast(carryIn2, streamTy), blockShift);
743            return iBuilder->CreateBitCast(iBuilder->CreateOr(b1, b0), iBuilder->getBitBlockType());
744        }
745    }
746}
747
748/** ------------------------------------------------------------------------------------------------------------- *
749 * @brief getNextCarryIn
750 ** ------------------------------------------------------------------------------------------------------------- */
751Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
752    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
753    if (mLoopDepth == 0) {
754        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
755    } else {
756        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
757    }
758    Type * const carryTy = iBuilder->getBitBlockType();
759    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
760    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
761    if (mLoopDepth > 0) {
762        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
763    }
764    return carryIn;
765}
766
767/** ------------------------------------------------------------------------------------------------------------- *
768 * @brief setNextCarryOut
769 ** ------------------------------------------------------------------------------------------------------------- */
770void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * carryOut) {
771    Type * const carryTy = iBuilder->getBitBlockType();
772    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
773    carryOut = iBuilder->CreateBitCast(carryOut, carryTy);
774    if (mCarryInfo->hasSummary()) {
775        addToCarryOutSummary(iBuilder, carryOut);
776    }
777    if (mLoopDepth != 0) {
778        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
779        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
780            Value * accum = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
781            carryOut = iBuilder->CreateOr(carryOut, accum);
782        }
783    }
784    ++mCurrentFrameIndex;
785    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
786    iBuilder->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
787}
788
789/** ------------------------------------------------------------------------------------------------------------- *
790 * @brief readCarryInSummary
791 ** ------------------------------------------------------------------------------------------------------------- */
792Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, ConstantInt * index) const {
793    assert (mCarryInfo->hasSummary());
794    unsigned count = 2;
795    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
796        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
797        count = 1;
798        while (frameTy->isStructTy()) {
799            ++count;
800            assert (frameTy->getStructNumElements() > 0);
801            frameTy = frameTy->getStructElementType(0);
802        }
803    }
804    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
805    Value * indicies[length];
806    std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
807    indicies[count - 1] = index;
808    if (mLoopDepth != 0) {
809        indicies[count] = mLoopSelector;
810    }
811
812    ArrayRef<Value *> ar(indicies, length);
813    Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
814    Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
815    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
816        Type * const carryTy = iBuilder->getBitBlockType();
817        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
818    }
819    return summary;
820}
821
822/** ------------------------------------------------------------------------------------------------------------- *
823 * @brief writeCarryOutSummary
824 ** ------------------------------------------------------------------------------------------------------------- */
825inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const summary, ConstantInt * index) const {
826    Value * ptr = nullptr;
827    assert (mCarryInfo->hasExplicitSummary());
828    if (mLoopDepth > 0) {
829        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
830    } else {
831        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
832    }
833    iBuilder->CreateBlockAlignedStore(summary, ptr);
834}
835
836/** ------------------------------------------------------------------------------------------------------------- *
837 * @brief addToCarryOutSummary
838 ** ------------------------------------------------------------------------------------------------------------- */
839inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value) {
840    assert ("cannot add null summary value!" && value);   
841    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
842    assert (mCarryInfo->hasSummary());
843    mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
844}
845
846/** ------------------------------------------------------------------------------------------------------------- *
847 * @brief enumerate
848 ** ------------------------------------------------------------------------------------------------------------- */
849unsigned CarryManager::getScopeCount(const PabloBlock * const scope, unsigned index) {
850    for (const Statement * stmt : *scope) {
851        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
852            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
853        }
854    }
855    return index + 1;
856}
857
858/** ------------------------------------------------------------------------------------------------------------- *
859 * @brief hasIterationSpecificAssignment
860 ** ------------------------------------------------------------------------------------------------------------- */
861bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
862#if 0
863    return dyn_cast_or_null<While>(scope->getBranch()) != nullptr;
864#else
865    if (const While * const br = dyn_cast_or_null<While>(scope->getBranch())) {
866        for (const Var * var : br->getEscaped()) {
867            for (const PabloAST * user : var->users()) {
868                if (const Extract * e = dyn_cast<Extract>(user)) {
869                    if (LLVM_UNLIKELY(e->getIndex() == var)) {
870                        // If we assign this Var a value and read the value as the index parameter
871                        // of a nested Extract statement, then we cannot collapse the carries.
872                        const PabloBlock * parent = e->getParent();
873                        for (;;) {
874                            if (parent == scope) {
875                                return true;
876                            }
877                            parent = parent->getPredecessor();
878                            if (parent == nullptr) {
879                                break;
880                            }
881                        }
882                    }
883                }
884            }
885        }
886    }
887    return false;
888#endif
889}
890
891static bool hasNonEmptyCarryStruct(const Type * const frameTy) {
892    if (frameTy->isStructTy()) {
893        for (unsigned i = 0; i < frameTy->getStructNumElements(); ++i) {
894            if (hasNonEmptyCarryStruct(frameTy->getStructElementType(i))) {
895                return true;
896            }
897        }
898        return false;
899    }
900    return true;
901}
902
903static bool hasNonEmptyCarryStruct(const std::vector<Type *> & state) {
904    for (const Type * const frameTy : state) {
905        if (hasNonEmptyCarryStruct(frameTy)) {
906            return true;
907        }
908    }
909    return false;
910}
911
912/** ------------------------------------------------------------------------------------------------------------- *
913 * @brief analyse
914 ** ------------------------------------------------------------------------------------------------------------- */
915StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope,
916                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
917    assert ("scope cannot be null!" && scope);
918    assert ("entry scope (and only the entry scope) must be in scope 0"
919            && (mCarryScopes == 0 ? (scope == mKernel->getEntryBlock()) : (scope != mKernel->getEntryBlock())));
920    assert (mCarryScopes < mCarryMetadata.size());
921    Type * const carryTy = iBuilder->getBitBlockType();
922    Type * const blockTy = iBuilder->getBitBlockType();
923
924    const unsigned carryScopeIndex = mCarryScopes++;
925    const bool nonCarryCollapsingMode = hasIterationSpecificAssignment(scope);
926    Type * const carryPackType = (loopDepth == 0) ? carryTy : ArrayType::get(carryTy, 2);
927    std::vector<Type *> state;
928    for (const Statement * stmt : *scope) {
929        if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
930            const auto amount = cast<Advance>(stmt)->getAmount();
931            Type * type = carryPackType;
932            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
933                const auto blockWidth = iBuilder->getBitBlockWidth();
934                const auto blocks = ceil_udiv(amount, blockWidth);
935                type = ArrayType::get(blockTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
936                if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) {
937                    const auto summarySize = ceil_udiv(blocks, blockWidth);
938                    // 1 bit will mark the presense of any bit in each block.
939                    state.push_back(ArrayType::get(blockTy, summarySize));
940                }
941                mHasLongAdvance = true;               
942            }
943            state.push_back(type);
944        } else if (LLVM_UNLIKELY(isa<IndexedAdvance>(stmt))) {
945            // The carry data for the indexed advance stores N bits of carry data,
946            // organized in packs that can be processed with GR instructions (such as PEXT, PDEP, popcount).
947            // A circular buffer is used.  Because the number of bits to be dequeued
948            // and enqueued is variable (based on the popcount of the index), an extra
949            // pack stores the offset position in the circular buffer.
950            const auto amount = cast<IndexedAdvance>(stmt)->getAmount();
951            const auto packWidth = sizeof(size_t) * 8;
952            const auto packs = ceil_udiv(amount, packWidth);
953            state.push_back(ArrayType::get(iBuilder->getSizeTy(), nearest_pow2(packs) + 1));
954        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
955            state.push_back(carryPackType);
956        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
957            state.push_back(analyse(iBuilder, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
958        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
959            mHasLoop = true;
960            state.push_back(analyse(iBuilder, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
961        }
962    }
963    // Build the carry state struct and add the summary pack if needed.
964    CarryData & cd = mCarryMetadata[carryScopeIndex];
965    StructType * carryState = nullptr;
966    CarryData::SummaryType summaryType = CarryData::NoSummary;
967    if (LLVM_UNLIKELY(state.empty())) {
968        carryState = StructType::get(iBuilder->getContext());
969    } else {
970        // do we have a summary or a sequence of nested empty structs?
971        if (hasNonEmptyCarryStruct(state)) {
972            if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
973                if (LLVM_LIKELY(state.size() > 1)) {
974                    summaryType = CarryData::ExplicitSummary;
975                    // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
976                    // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
977                    state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
978                } else {
979                    summaryType = CarryData::ImplicitSummary;
980                    if (hasNonEmptyCarryStruct(state[0])) {
981                        summaryType = CarryData::BorrowedSummary;
982                    }
983                }
984            }
985        }
986        carryState = StructType::get(iBuilder->getContext(), state);
987        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
988        // carry state pointer, and summary pointer struct.
989        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
990            mHasNonCarryCollapsingLoops = true;
991            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo(), nullptr);
992            assert (isDynamicallyAllocatedType(carryState));
993        }
994        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
995    }
996    cd.setSummaryType(summaryType);
997    return carryState;
998}
999
1000/** ------------------------------------------------------------------------------------------------------------- *
1001 * @brief constructor
1002 ** ------------------------------------------------------------------------------------------------------------- */
1003CarryManager::CarryManager() noexcept
1004: mKernel(nullptr)
1005, mCurrentFrame(nullptr)
1006, mCurrentFrameIndex(0)
1007, mCurrentScope(nullptr)
1008, mCarryInfo(nullptr)
1009, mNextSummaryTest(nullptr)
1010, mIfDepth(0)
1011, mHasLongAdvance(false)
1012, mHasNonCarryCollapsingLoops(false)
1013, mHasLoop(false)
1014, mLoopDepth(0)
1015, mLoopSelector(nullptr)
1016, mNextLoopSelector(nullptr)
1017, mCarryPackPtr(nullptr)
1018, mCarryScopes(0) {
1019
1020}
1021
1022}
Note: See TracBrowser for help on using the repository browser.