source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5710

Last change on this file since 5710 was 5710, checked in by cameron, 2 years ago

Enabling Unicode log2 bounded repetition uwing indexed advance (for n<64 and AVX2 only)

File size: 52.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <llvm/IR/DerivedTypes.h>
12#include <llvm/Transforms/Utils/Local.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18#include <kernels/kernel_builder.h>
19#include <toolchain/toolchain.h>
20#include <array>
21
22using namespace llvm;
23
24namespace pablo {
25
26inline static bool is_power_2(const unsigned n) {
27    return (n && ((n & (n - 1)) == 0));
28}
29
30inline static unsigned ceil_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
32    return (sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v - 1U);
33}
34
35inline static unsigned floor_log2(const unsigned v) {
36    assert ("log2(0) is undefined!" && v != 0);
37    return ((sizeof(unsigned) * CHAR_BIT) - 1U) - __builtin_clz(v);
38}
39
40inline static unsigned nearest_pow2(const unsigned v) {
41    assert (v > 0 && v < (UINT32_MAX / 2));
42    return (v < 2) ? 1 : (1U << ceil_log2(v));
43}
44
45inline static unsigned nearest_multiple(const unsigned n, const unsigned m) {
46    assert (is_power_2(m));
47    const unsigned r = (n + m - 1U) & -m;
48    assert (r >= n);
49    return r;
50}
51
52inline static bool is_multiple_of(const unsigned n, const unsigned m) {
53    return nearest_multiple(n, m) == n;
54}
55
56inline static unsigned udiv(const unsigned x, const unsigned y) {
57    assert (is_power_2(y));
58    const unsigned z = x >> floor_log2(y);
59    assert (z == (x / y));
60    return z;
61}
62
63inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
64    return (((x - 1) | (y - 1)) + 1) / y;
65}
66
67using TypeId = PabloAST::ClassTypeId;
68
69inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
70    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt) && !isa<IndexedAdvance>(stmt);
71}
72
73#define LONG_ADVANCE_BREAKPOINT 64
74
75/** ------------------------------------------------------------------------------------------------------------- *
76 * @brief initializeCarryData
77 ** ------------------------------------------------------------------------------------------------------------- */
78void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
79
80    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
81    // that is added to the Kernel. The scope index will indicate which struct to access.
82
83    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
84    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
85    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
86    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
87    // the fact every pablo While loop must be executed at least once.
88
89    // A nested loop may also contain a variable length CarryData struct
90
91    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
92    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
93    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
94    // set of carries for that iteration.
95
96    assert (mKernel == nullptr);
97    mCurrentScope = kernel->getEntryBlock();
98    mKernel = kernel;
99
100    mCarryScopes = 0;
101
102    mCarryMetadata.resize(getScopeCount(mCurrentScope));
103
104    Type * const carryStateTy = analyse(iBuilder, mCurrentScope);
105
106    kernel->addScalar(carryStateTy, "carries");
107
108    if (mHasLoop) {
109        kernel->addScalar(iBuilder->getInt32Ty(), "selector");
110    }
111    if (mHasLongAdvance) {
112        kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
113    }
114    for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
115        kernel->addScalar(iBuilder->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
116    }
117}
118
119bool isDynamicallyAllocatedType(const Type * const ty) {
120    if (isa<StructType>(ty) && ty->getStructNumElements() == 3) {
121        return (ty->getStructElementType(1)->isPointerTy() && ty->getStructElementType(2)->isPointerTy() && ty->getStructElementType(0)->isIntegerTy());
122    }
123    return false;
124}
125
126bool containsDynamicallyAllocatedType(const Type * const ty) {
127    if (isa<StructType>(ty)) {
128        for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
129            if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
130                return true;
131            }
132        }
133    }
134    return false;
135}
136
137void freeDynamicallyAllocatedMemory(const std::unique_ptr<kernel::KernelBuilder> & idb, Value * const frame) {
138    StructType * const ty = cast<StructType>(frame->getType()->getPointerElementType());
139    std::array<Value *, 3> indices;
140    indices[0] = idb->getInt32(0);
141    for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
142        if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
143            indices[1] = idb->getInt32(i);
144            indices[2] = idb->getInt32(1);
145            Value * const innerFrame = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
146            if (containsDynamicallyAllocatedType(innerFrame->getType())) {
147                indices[2] = indices[0];
148                Value *  const count = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
149                BasicBlock * const entry = idb->GetInsertBlock();
150                BasicBlock * const cond = idb->CreateBasicBlock("freeCarryDataCond");
151                BasicBlock * const body = idb->CreateBasicBlock("freeCarryDataLoop");
152                BasicBlock * const exit = idb->CreateBasicBlock("freeCarryDataExit");
153                idb->CreateBr(cond);
154                idb->SetInsertPoint(cond);
155                PHINode * const index = idb->CreatePHI(count->getType(), 2);
156                index->addIncoming(ConstantInt::getNullValue(count->getType()), entry);
157                Value * test = idb->CreateICmpNE(index, count);
158                idb->CreateCondBr(test, body, exit);
159                idb->SetInsertPoint(body);
160                freeDynamicallyAllocatedMemory(idb, idb->CreateGEP(innerFrame, index));
161                index->addIncoming(idb->CreateAdd(index, ConstantInt::get(count->getType(), 1)), body);
162                idb->CreateBr(cond);
163                idb->SetInsertPoint(exit);
164            }
165            idb->CreateFree(innerFrame);
166            indices[2] = idb->getInt32(2);
167            Value *  const summary = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
168            idb->CreateFree(summary);
169        }
170    }
171}
172
173/** ------------------------------------------------------------------------------------------------------------- *
174 * @brief releaseCarryData
175 ** ------------------------------------------------------------------------------------------------------------- */
176void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
177    if (mHasNonCarryCollapsingLoops) {
178        freeDynamicallyAllocatedMemory(idb, idb->getScalarFieldPtr("carries"));
179    }
180}
181
182/** ------------------------------------------------------------------------------------------------------------- *
183 * @brief initializeCodeGen
184 ** ------------------------------------------------------------------------------------------------------------- */
185void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
186
187    assert(!mCarryMetadata.empty());
188    mCarryInfo = &mCarryMetadata[0];
189    assert (!mCarryInfo->hasSummary());
190
191    mCurrentFrame = iBuilder->getScalarFieldPtr("carries");
192    mCurrentFrameIndex = 0;
193    mCarryScopes = 0;
194    mCarryScopeIndex.push_back(0);
195
196    assert (mCarryFrameStack.empty());
197
198    assert (mCarrySummaryStack.empty());
199
200    Type * const carryTy = iBuilder->getBitBlockType();
201
202    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
203
204    if (mHasLoop) {       
205        mLoopSelector = iBuilder->getScalarField("selector");
206        mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
207    }
208
209}
210
211/** ------------------------------------------------------------------------------------------------------------- *
212 * @brief finalizeCodeGen
213 ** ------------------------------------------------------------------------------------------------------------- */
214void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
215    if (mHasLoop) {
216        iBuilder->setScalarField("selector", mNextLoopSelector);
217    }
218    if (mHasLongAdvance) {
219        Value * idx = iBuilder->getScalarField("CarryBlockIndex");
220        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
221        iBuilder->setScalarField("CarryBlockIndex", idx);
222    }
223    assert (mCarryFrameStack.empty());   
224    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
225    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
226    mCarrySummaryStack.clear();
227    assert (mCarryScopeIndex.size() == 1);
228    mCarryScopeIndex.clear();
229}
230
231/** ------------------------------------------------------------------------------------------------------------- *
232 * @brief enterLoopScope
233 ** ------------------------------------------------------------------------------------------------------------- */
234void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
235    assert (scope);
236    assert (mHasLoop);
237    ++mLoopDepth;
238    enterScope(iBuilder, scope);
239}
240
241/** ------------------------------------------------------------------------------------------------------------- *
242 * @brief enterLoopBody
243 ** ------------------------------------------------------------------------------------------------------------- */
244void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock) {
245    if (mCarryInfo->hasSummary()) {
246        Type * const carryTy = iBuilder->getBitBlockType();
247        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(carryTy, 2, "summary");
248        assert (!mCarrySummaryStack.empty());
249        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
250        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
251        // properly OR it into the outgoing summary value.
252        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
253        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
254        mCarrySummaryStack.back() = phiCarryOutSummary;
255        mCarrySummaryStack.push_back(phiCarryOutSummary);
256    }
257    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
258
259        assert (mCarryInfo->hasSummary());
260
261        Type * const int8PtrTy = iBuilder->getInt8PtrTy();
262        Type * const carryTy = iBuilder->getBitBlockType();
263        PointerType * const carryPtrTy = carryTy->getPointerTo();
264
265        // Check whether we need to resize the carry state
266        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
267        mLoopIndicies.push_back(index);
268        index->addIncoming(iBuilder->getSize(0), entryBlock);
269        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
270        Value * capacity = iBuilder->CreateLoad(capacityPtr, "capacity");
271        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
272        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
273        Value * array = iBuilder->CreateLoad(arrayPtr, "array");
274        BasicBlock * const entry = iBuilder->GetInsertBlock();
275        BasicBlock * const resizeCarryState = iBuilder->CreateBasicBlock("ResizeCarryState");
276        BasicBlock * const reallocExisting = iBuilder->CreateBasicBlock("ReallocExisting");
277        BasicBlock * const createNew = iBuilder->CreateBasicBlock("CreateNew");
278        BasicBlock * const resumeKernel = iBuilder->CreateBasicBlock("ResumeKernel");
279        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
280
281        // RESIZE CARRY BLOCK
282        iBuilder->SetInsertPoint(resizeCarryState);
283        const auto BlockWidth = iBuilder->getBitBlockWidth() / 8;
284        const auto Log2BlockWidth = floor_log2(BlockWidth);
285        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
286        Value * const summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
287        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
288        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
289
290        // REALLOCATE EXISTING
291        iBuilder->SetInsertPoint(reallocExisting);
292        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
293        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
294        Value * const newArray = iBuilder->CreateCacheAlignedMalloc(newCapacitySize);
295        iBuilder->CreateMemCpy(newArray, array, capacitySize, iBuilder->getCacheAlignment());
296        iBuilder->CreateFree(array);
297        iBuilder->CreateStore(newArray, arrayPtr);
298        Value * const startNewArrayPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), capacitySize);
299        iBuilder->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
300        Value * const newCapacity = iBuilder->CreateShl(capacity, 1);
301        iBuilder->CreateStore(newCapacity, capacityPtr);
302        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
303        Value * const summarySize = iBuilder->CreateShl(iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
304        Constant * const additionalSpace = iBuilder->getSize(2 * BlockWidth);
305        Value * const newSummarySize = iBuilder->CreateAdd(summarySize, additionalSpace);
306        Value * const newSummary = iBuilder->CreateBlockAlignedMalloc(newSummarySize);
307        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
308        iBuilder->CreateFree(summary);
309        iBuilder->CreateStore(iBuilder->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
310        Value * const startNewSummaryPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newSummary, int8PtrTy), summarySize);
311        iBuilder->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
312        iBuilder->CreateBr(resumeKernel);
313
314        // CREATE NEW
315        iBuilder->SetInsertPoint(createNew);
316        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
317        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
318        iBuilder->CreateStore(initialCapacity, capacityPtr);
319        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
320        Value * initialArray = iBuilder->CreateCacheAlignedMalloc(initialCapacitySize);
321        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
322        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
323        iBuilder->CreateStore(initialArray, arrayPtr);
324        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
325        Value * initialSummary = iBuilder->CreateBlockAlignedMalloc(initialSummarySize);
326        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
327        initialSummary = iBuilder->CreatePointerCast(initialSummary, carryPtrTy);
328        iBuilder->CreateStore(initialSummary, summaryPtr);
329        iBuilder->CreateBr(resumeKernel);
330
331        // RESUME KERNEL
332        iBuilder->SetInsertPoint(resumeKernel);
333        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
334        phiArrayPtr->addIncoming(array, entry);
335        phiArrayPtr->addIncoming(initialArray, createNew);
336        phiArrayPtr->addIncoming(newArray, reallocExisting);
337
338        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
339        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
340        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
341    }
342}
343
344/** ------------------------------------------------------------------------------------------------------------- *
345 * @brief leaveLoopBody
346 ** ------------------------------------------------------------------------------------------------------------- */
347void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * /* exitBlock */) {
348
349    Type * const carryTy = iBuilder->getBitBlockType();
350
351    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
352
353        assert (mCarryInfo->hasSummary());
354
355        ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
356
357        Value * const carryInAccumulator = readCarryInSummary(iBuilder, summaryIndex);
358        Value * const carryOutAccumulator = mCarrySummaryStack.back();
359
360        if (mCarryInfo->hasExplicitSummary()) {
361            writeCarryOutSummary(iBuilder, carryOutAccumulator, summaryIndex);
362        }
363
364        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
365        mCarryFrameStack.pop_back();
366
367        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
368        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
369        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
370        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
371        // (for the subsequent block to subtract) using a half-adder circuit.
372
373        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
374        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
375        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
376
377        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
378        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
379        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
380        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
381
382        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
383
384        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
385
386        BasicBlock * entry = iBuilder->GetInsertBlock();
387        BasicBlock * update = iBuilder->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
388        BasicBlock * resume = iBuilder->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
389
390        iBuilder->CreateBr(update);
391
392        iBuilder->SetInsertPoint(update);
393        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
394        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
395        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
396        borrow->addIncoming(carryInAccumulator, entry);
397        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
398        carry->addIncoming(carryOutAccumulator, entry);
399        // OR the updated carry in summary later for the summaryTest
400        PHINode * const carryInSummary = iBuilder->CreatePHI(carryTy, 2);
401        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
402
403        // half subtractor
404        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
405        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
406        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
407        Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
408        Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
409
410        iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
411        carryInSummary->addIncoming(nextSummary, update);
412        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
413        borrow->addIncoming(finalBorrow, update);
414
415        // half adder
416        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
417        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
418        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
419        Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
420
421        iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
422        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
423        carry->addIncoming(finalCarry, update);
424
425        // loop condition
426        i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
427        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
428
429        iBuilder->SetInsertPoint(resume);
430
431        if (codegen::EnableAsserts) {
432            iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
433                                       "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
434        }
435
436        assert (!mLoopIndicies.empty());
437        PHINode * index = mLoopIndicies.back();
438        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
439        mLoopIndicies.pop_back();
440
441        mNextSummaryTest = nextSummary;
442    }
443    if (mCarryInfo->hasSummary()) {
444        const auto n = mCarrySummaryStack.size(); assert (n > 1);
445        Value * carryOut = mCarrySummaryStack.back();
446        mCarrySummaryStack.pop_back();
447        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
448        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
449        // If we're returning to the base scope, reset our accumulated summary value.
450        if (n == 2) {
451            carryOut = Constant::getNullValue(carryTy);
452        }
453        mCarrySummaryStack.back() = carryOut;
454    }
455}
456
457/** ------------------------------------------------------------------------------------------------------------- *
458 * @brief leaveLoopScope
459 ** ------------------------------------------------------------------------------------------------------------- */
460void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
461    assert (mLoopDepth > 0);
462    --mLoopDepth;
463    leaveScope(iBuilder);
464}
465
466/** ------------------------------------------------------------------------------------------------------------- *
467 * @brief enterIfScope
468 ** ------------------------------------------------------------------------------------------------------------- */
469void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
470    ++mIfDepth;
471    enterScope(iBuilder, scope);
472    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
473    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
474    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
475    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
476        assert (mCurrentFrameIndex == 0);
477        mNextSummaryTest = readCarryInSummary(iBuilder, iBuilder->getInt32(0));
478        if (mCarryInfo->hasExplicitSummary()) {
479            mCurrentFrameIndex = 1;
480        }
481    }
482    Type * const carryTy = iBuilder->getBitBlockType();
483    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
484}
485
486/** ------------------------------------------------------------------------------------------------------------- *
487 * @brief generateSummaryTest
488 ** ------------------------------------------------------------------------------------------------------------- */
489Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * condition) {
490    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
491        assert ("summary test was not generated" && mNextSummaryTest);
492        condition = iBuilder->simd_or(condition, mNextSummaryTest);
493        mNextSummaryTest = nullptr;
494    }
495    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
496    return condition;
497}
498
499/** ------------------------------------------------------------------------------------------------------------- *
500 * @brief enterIfBody
501 ** ------------------------------------------------------------------------------------------------------------- */
502void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */, BasicBlock * const entryBlock) {
503    assert (entryBlock);
504}
505
506/** ------------------------------------------------------------------------------------------------------------- *
507 * @brief leaveIfBody
508 ** ------------------------------------------------------------------------------------------------------------- */
509void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const exitBlock) {
510    assert (exitBlock);
511    const auto n = mCarrySummaryStack.size();
512    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
513        writeCarryOutSummary(iBuilder, mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
514    }
515    if (n > 2) {
516        mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
517    }
518}
519
520/** ------------------------------------------------------------------------------------------------------------- *
521 * @brief leaveIfScope
522 ** ------------------------------------------------------------------------------------------------------------- */
523void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
524    assert (mIfDepth > 0);
525    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
526        const auto n = mCarrySummaryStack.size(); assert (n > 0);
527        if (n > 2) {
528            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
529            // appropriate summary is stored in the outer scope.
530            Value * nested = mCarrySummaryStack[n - 1];
531            Value * outer = mCarrySummaryStack[n - 2];
532            assert (nested->getType() == outer->getType());
533            PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
534            phi->addIncoming(outer, entryBlock);
535            phi->addIncoming(nested, exitBlock);
536            mCarrySummaryStack[n - 2] = phi;
537        }
538    }
539    --mIfDepth;
540    leaveScope(iBuilder);
541    mCarrySummaryStack.pop_back();
542}
543
544/** ------------------------------------------------------------------------------------------------------------ *
545 * @brief enterScope
546 ** ------------------------------------------------------------------------------------------------------------- */
547void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
548    assert (scope);
549    // Store the state of the current frame and update the scope state
550    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
551    mCurrentScope = scope;
552    mCarryScopeIndex.push_back(++mCarryScopes);
553    mCarryInfo = &mCarryMetadata[mCarryScopes];
554    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
555    // compilation or a memory corruption has occured.
556    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
557    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
558    // Verify we're pointing to a carry frame struct
559    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
560    mCurrentFrameIndex = 0;
561}
562
563/** ------------------------------------------------------------------------------------------------------------- *
564 * @brief leaveScope
565 ** ------------------------------------------------------------------------------------------------------------- */
566void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */) {
567
568    // Did we use all of the packs in this carry struct?
569    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
570    // Sanity test: are there remaining carry frames?
571    assert (!mCarryFrameStack.empty());
572
573    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
574
575    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
576
577    mCarryFrameStack.pop_back();
578    mCarryScopeIndex.pop_back();
579    assert (!mCarryScopeIndex.empty());
580    mCurrentScope = mCurrentScope->getPredecessor();
581    assert (mCurrentScope);
582    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
583}
584
585/** ------------------------------------------------------------------------------------------------------------- *
586 * @brief addCarryInCarryOut
587 ** ------------------------------------------------------------------------------------------------------------- */
588Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
589    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
590    Value * const carryIn = getNextCarryIn(iBuilder);
591    Value * carryOut, * result;
592    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
593    setNextCarryOut(iBuilder, carryOut);
594    assert (result->getType() == iBuilder->getBitBlockType());
595    return result;
596}
597
598/** ------------------------------------------------------------------------------------------------------------- *
599 * @brief advanceCarryInCarryOut
600 ** ------------------------------------------------------------------------------------------------------------- */
601Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * const advance, Value * const value) {
602    const auto shiftAmount = advance->getAmount();
603    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
604        Value * const carryIn = getNextCarryIn(iBuilder);
605        Value * carryOut, * result;
606        std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
607        setNextCarryOut(iBuilder, carryOut);
608        assert (result->getType() == iBuilder->getBitBlockType());
609        return result;
610    } else {
611        return longAdvanceCarryInCarryOut(iBuilder, value, shiftAmount);
612    }
613}
614
615Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
616    const auto shiftAmount = advance->getAmount();
617    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
618        Value * const carryIn = getNextCarryIn(b);
619        unsigned bitWidth = sizeof(size_t) * 8;
620        Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
621        Value * PEXT_f = nullptr;
622        Value * PDEP_f = nullptr;
623        if (bitWidth == 64) {
624            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
625            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
626        }
627        else if ((bitWidth == 32)  && (shiftAmount < 32)) {
628            PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
629            PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
630        }
631        else {
632            llvm::report_fatal_error("indexed_advance unsupported bit width");
633        }
634        Value * shiftVal = b->getSize(shiftAmount);
635        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
636        Value * result = b->allZeroes();
637        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
638            Value * s = b->mvmd_extract(bitWidth, strm, i);
639            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
640            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
641            Value * bits = b->CreateCall(PEXT_f, {s, ix});
642            Value * adv = b->CreateOr(b->CreateShl(bits, shiftAmount), carry);
643            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
644            Value * carry_if_popcount_small = 
645                b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
646                            b->CreateLShr(carry, ix_popcnt));
647            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
648            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
649            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {adv, ix}), i);
650        }
651        Value * carryOut = b->mvmd_insert(bitWidth, b->allZeroes(), carry, 0);
652        setNextCarryOut(b, carryOut);
653        return result;
654    } else {
655        llvm::report_fatal_error("IndexedAdvance > LONG_ADVANCE_BREAKPOINT not yet supported.");
656    }
657}
658
659
660/** ------------------------------------------------------------------------------------------------------------- *
661 * @brief longAdvanceCarryInCarryOut
662 ** ------------------------------------------------------------------------------------------------------------- */
663inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value, const unsigned shiftAmount) {
664
665    assert (mHasLongAdvance);
666    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
667    assert (value);
668
669    const auto blockWidth = iBuilder->getBitBlockWidth();
670    Type * const streamTy = iBuilder->getIntNTy(blockWidth);
671
672    Value * indices[3];
673
674    indices[0] = iBuilder->getInt32(0);
675
676    if (mIfDepth > 0) {
677        if (shiftAmount > blockWidth) {
678
679            // TODO: once CEILING(shiftAmount / 256) > 2, consider using a half-adder/subtractor strategy?
680
681            Value * carry = iBuilder->CreateZExt(iBuilder->bitblock_any(value), streamTy);
682            const auto summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
683            const auto summarySize = ceil_udiv(summaryBlocks, blockWidth);
684            VectorType * const bitBlockTy = iBuilder->getBitBlockType();
685            IntegerType * const laneTy = cast<IntegerType>(bitBlockTy->getVectorElementType());
686            const auto laneWidth = laneTy->getIntegerBitWidth();
687
688            assert (summarySize > 0);
689            assert (is_power_2(laneWidth));
690
691            indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
692
693            for (unsigned i = 1;;++i) {
694
695                assert (i <= summarySize);
696
697                indices[2] = iBuilder->getInt32(i - 1);
698
699                Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, indices);
700                Value * const prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamTy);
701
702                Value * advanced = nullptr;
703                if (LLVM_LIKELY(summaryBlocks < laneWidth)) {
704                    advanced = iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry);
705                    carry = iBuilder->CreateLShr(prior, summaryBlocks - 1);
706                } else {
707                    std::tie(advanced, carry) = iBuilder->bitblock_advance(prior, carry, 1);
708                }
709                Value * stream = iBuilder->CreateBitCast(advanced, bitBlockTy);
710                if (LLVM_LIKELY(i == summarySize)) {
711                    const auto n = bitBlockTy->getVectorNumElements();
712                    Constant * mask[n];                                       
713                    const auto m = udiv(summaryBlocks, laneWidth);
714                    if (m) {
715                        std::fill_n(mask, m, ConstantInt::getAllOnesValue(laneTy));
716                    }
717                    mask[m] = ConstantInt::get(laneTy, (1UL << (summaryBlocks & (laneWidth - 1))) - 1UL);
718                    if (n > m) {
719                        std::fill_n(mask + m + 1, n - m, UndefValue::get(laneTy));
720                    }
721                    stream = iBuilder->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
722                    addToCarryOutSummary(iBuilder, stream);
723                    iBuilder->CreateBlockAlignedStore(stream, ptr);
724                    break;
725                }
726                addToCarryOutSummary(iBuilder, stream);
727                iBuilder->CreateBlockAlignedStore(stream, ptr);
728            }
729
730        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
731            addToCarryOutSummary(iBuilder, value);
732        }
733    }
734
735    indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
736
737    // special case using a single buffer entry and the carry_out value.
738    if (LLVM_LIKELY((shiftAmount < blockWidth) && (mLoopDepth == 0))) {
739
740        indices[2] = indices[0]; // iBuilder->getInt32(0)
741        assert (cast<ConstantInt>(indices[2])->isNullValue());
742
743        Value * const buffer = iBuilder->CreateGEP(mCurrentFrame, indices);
744        assert (buffer->getType()->getPointerElementType() == iBuilder->getBitBlockType());
745        Value * carryIn = iBuilder->CreateBlockAlignedLoad(buffer);
746
747        iBuilder->CreateBlockAlignedStore(value, buffer);
748        /* Very special case - no combine */
749        if (LLVM_UNLIKELY(shiftAmount == blockWidth)) {
750            return iBuilder->CreateBitCast(carryIn, iBuilder->getBitBlockType());
751        }
752        Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
753        Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, streamTy), shiftAmount);
754        return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), iBuilder->getBitBlockType());
755    } else { //
756        const unsigned blockShift = shiftAmount & (blockWidth - 1);
757        const unsigned summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
758
759        // Create a mask to implement circular buffer indexing
760        Value * indexMask = iBuilder->getSize(nearest_pow2(summaryBlocks) - 1);
761        Value * blockIndex = iBuilder->getScalarField("CarryBlockIndex");
762
763        Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks));
764        indices[2] = iBuilder->CreateAnd(carryIndex0, indexMask);
765        Value * const carryInPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
766        Value * carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
767
768        indices[2] = iBuilder->CreateAnd(blockIndex, indexMask);
769        Value * const carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
770        assert (carryIn->getType() == iBuilder->getBitBlockType());
771
772        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
773        // block in the long advance carry data area.
774        if (LLVM_UNLIKELY(blockShift == 0)) {
775            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
776            return carryIn;
777        } else { // Otherwise we need to combine data from the two oldest blocks.
778            Value * const carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks - 1));
779            indices[2] = iBuilder->CreateAnd(carryIndex1, indexMask);
780
781            Value * const carryInPtr2 = iBuilder->CreateGEP(mCurrentFrame, indices);
782            Value * const carryIn2 = iBuilder->CreateBlockAlignedLoad(carryInPtr2);
783            assert (carryOutPtr->getType()->getPointerElementType() == value->getType());
784            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
785
786            Value * const b0 = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
787            Value * const b1 = iBuilder->CreateShl(iBuilder->CreateBitCast(carryIn2, streamTy), blockShift);
788            return iBuilder->CreateBitCast(iBuilder->CreateOr(b1, b0), iBuilder->getBitBlockType());
789        }
790    }
791}
792
793/** ------------------------------------------------------------------------------------------------------------- *
794 * @brief getNextCarryIn
795 ** ------------------------------------------------------------------------------------------------------------- */
796Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
797    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
798    if (mLoopDepth == 0) {
799        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
800    } else {
801        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
802    }
803    Type * const carryTy = iBuilder->getBitBlockType();
804    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
805    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
806    if (mLoopDepth > 0) {
807        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
808    }
809    return carryIn;
810}
811
812/** ------------------------------------------------------------------------------------------------------------- *
813 * @brief setNextCarryOut
814 ** ------------------------------------------------------------------------------------------------------------- */
815void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * carryOut) {
816    Type * const carryTy = iBuilder->getBitBlockType();
817    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
818    carryOut = iBuilder->CreateBitCast(carryOut, carryTy);
819    if (mCarryInfo->hasSummary()) {
820        addToCarryOutSummary(iBuilder, carryOut);
821    }
822    if (mLoopDepth != 0) {
823        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
824        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
825            Value * accum = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
826            carryOut = iBuilder->CreateOr(carryOut, accum);
827        }
828    }
829    ++mCurrentFrameIndex;
830    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
831    iBuilder->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
832}
833
834/** ------------------------------------------------------------------------------------------------------------- *
835 * @brief readCarryInSummary
836 ** ------------------------------------------------------------------------------------------------------------- */
837Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, ConstantInt * index) const {
838    assert (mCarryInfo->hasSummary());
839    unsigned count = 2;
840    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
841        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
842        count = 1;
843        while (frameTy->isStructTy()) {
844            ++count;
845            assert (frameTy->getStructNumElements() > 0);
846            frameTy = frameTy->getStructElementType(0);
847        }
848    }
849    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
850    Value * indicies[length];
851    std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
852    indicies[count - 1] = index;
853    if (mLoopDepth != 0) {
854        indicies[count] = mLoopSelector;
855    }
856
857    ArrayRef<Value *> ar(indicies, length);
858    Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
859    Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
860    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
861        Type * const carryTy = iBuilder->getBitBlockType();
862        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
863    }
864    return summary;
865}
866
867/** ------------------------------------------------------------------------------------------------------------- *
868 * @brief writeCarryOutSummary
869 ** ------------------------------------------------------------------------------------------------------------- */
870inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const summary, ConstantInt * index) const {
871    Value * ptr = nullptr;
872    assert (mCarryInfo->hasExplicitSummary());
873    if (mLoopDepth > 0) {
874        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
875    } else {
876        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
877    }
878    iBuilder->CreateBlockAlignedStore(summary, ptr);
879}
880
881/** ------------------------------------------------------------------------------------------------------------- *
882 * @brief addToCarryOutSummary
883 ** ------------------------------------------------------------------------------------------------------------- */
884inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value) {
885    assert ("cannot add null summary value!" && value);   
886    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
887    assert (mCarryInfo->hasSummary());
888    mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
889}
890
891/** ------------------------------------------------------------------------------------------------------------- *
892 * @brief enumerate
893 ** ------------------------------------------------------------------------------------------------------------- */
894unsigned CarryManager::getScopeCount(const PabloBlock * const scope, unsigned index) {
895    for (const Statement * stmt : *scope) {
896        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
897            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
898        }
899    }
900    return index + 1;
901}
902
903/** ------------------------------------------------------------------------------------------------------------- *
904 * @brief hasIterationSpecificAssignment
905 ** ------------------------------------------------------------------------------------------------------------- */
906bool isNonRegularLanguage(const PabloBlock * const scope) {
907    if (const Branch * br = scope->getBranch()) {
908        return !br->isRegular();
909    }
910    return false;
911}
912
913static bool hasNonEmptyCarryStruct(const Type * const frameTy) {
914    if (frameTy->isStructTy()) {
915        for (unsigned i = 0; i < frameTy->getStructNumElements(); ++i) {
916            if (hasNonEmptyCarryStruct(frameTy->getStructElementType(i))) {
917                return true;
918            }
919        }
920        return false;
921    }
922    return true;
923}
924
925static bool hasNonEmptyCarryStruct(const std::vector<Type *> & state) {
926    for (const Type * const frameTy : state) {
927        if (hasNonEmptyCarryStruct(frameTy)) {
928            return true;
929        }
930    }
931    return false;
932}
933
934/** ------------------------------------------------------------------------------------------------------------- *
935 * @brief analyse
936 ** ------------------------------------------------------------------------------------------------------------- */
937StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope,
938                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
939    assert ("scope cannot be null!" && scope);
940    assert ("entry scope (and only the entry scope) must be in scope 0"
941            && (mCarryScopes == 0 ? (scope == mKernel->getEntryBlock()) : (scope != mKernel->getEntryBlock())));
942    assert (mCarryScopes < mCarryMetadata.size());
943    Type * const carryTy = iBuilder->getBitBlockType();
944    Type * const blockTy = iBuilder->getBitBlockType();
945
946    const unsigned carryScopeIndex = mCarryScopes++;
947    const bool nonCarryCollapsingMode = isNonRegularLanguage(scope);
948    Type * const carryPackType = (loopDepth == 0) ? carryTy : ArrayType::get(carryTy, 2);
949    std::vector<Type *> state;
950    for (const Statement * stmt : *scope) {
951        if (LLVM_UNLIKELY(isa<Advance>(stmt) || isa<IndexedAdvance>(stmt))) {
952            int64_t amount;
953            if (isa<Advance>(stmt)) amount = cast<Advance>(stmt)->getAmount();
954            else amount = cast<IndexedAdvance>(stmt)->getAmount();
955            Type * type = carryPackType;
956            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
957                const auto blockWidth = iBuilder->getBitBlockWidth();
958                const auto blocks = ceil_udiv(amount, blockWidth);
959                type = ArrayType::get(blockTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
960                if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) {
961                    const auto summarySize = ceil_udiv(blocks, blockWidth);
962                    // 1 bit will mark the presense of any bit in each block.
963                    state.push_back(ArrayType::get(blockTy, summarySize));
964                }
965                mHasLongAdvance = true;
966                if (isa<IndexedAdvance>(stmt)) mIndexedLongAdvanceTotal++;
967            }
968            state.push_back(type);
969        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
970            state.push_back(carryPackType);
971        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
972            state.push_back(analyse(iBuilder, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
973        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
974            mHasLoop = true;
975            state.push_back(analyse(iBuilder, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
976        }
977    }
978    // Build the carry state struct and add the summary pack if needed.
979    CarryData & cd = mCarryMetadata[carryScopeIndex];
980    StructType * carryState = nullptr;
981    CarryData::SummaryType summaryType = CarryData::NoSummary;
982    if (LLVM_UNLIKELY(state.empty())) {
983        carryState = StructType::get(iBuilder->getContext());
984    } else {
985        // do we have a summary or a sequence of nested empty structs?
986        if (hasNonEmptyCarryStruct(state)) {
987            if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
988                if (LLVM_LIKELY(state.size() > 1)) {
989                    summaryType = CarryData::ExplicitSummary;
990                    // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
991                    // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
992                    state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
993                } else {
994                    summaryType = CarryData::ImplicitSummary;
995                    if (hasNonEmptyCarryStruct(state[0])) {
996                        summaryType = CarryData::BorrowedSummary;
997                    }
998                }
999            }
1000        }
1001        carryState = StructType::get(iBuilder->getContext(), state);
1002        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
1003        // carry state pointer, and summary pointer struct.
1004        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
1005            mHasNonCarryCollapsingLoops = true;
1006            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo(), nullptr);
1007            assert (isDynamicallyAllocatedType(carryState));
1008        }
1009        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
1010    }
1011    cd.setSummaryType(summaryType);
1012    return carryState;
1013}
1014
1015/** ------------------------------------------------------------------------------------------------------------- *
1016 * @brief constructor
1017 ** ------------------------------------------------------------------------------------------------------------- */
1018CarryManager::CarryManager() noexcept
1019: mKernel(nullptr)
1020, mCurrentFrame(nullptr)
1021, mCurrentFrameIndex(0)
1022, mCurrentScope(nullptr)
1023, mCarryInfo(nullptr)
1024, mNextSummaryTest(nullptr)
1025, mIfDepth(0)
1026, mHasLongAdvance(false)
1027, mIndexedLongAdvanceTotal(0)
1028, mIndexedLongAdvanceIndex(0)
1029, mHasNonCarryCollapsingLoops(false)
1030, mHasLoop(false)
1031, mLoopDepth(0)
1032, mLoopSelector(nullptr)
1033, mNextLoopSelector(nullptr)
1034, mCarryPackPtr(nullptr)
1035, mCarryScopes(0) {
1036
1037}
1038
1039}
Note: See TracBrowser for help on using the repository browser.