source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

Last change on this file was 6275, checked in by nmedfort, 5 months ago

More work on optimizing for stateless kernels

File size: 53.8 KB
RevLine 
[4644]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[5267]7#include "carry_manager.h"
[4683]8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
[4726]10#include <llvm/IR/BasicBlock.h>
[5267]11#include <llvm/IR/DerivedTypes.h>
[5586]12#include <llvm/Transforms/Utils/Local.h>
[5267]13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
[5440]18#include <kernels/kernel_builder.h>
[5493]19#include <toolchain/toolchain.h>
20#include <array>
[5371]21
[5267]22using namespace llvm;
23
[4925]24namespace pablo {
[4715]25
[5586]26inline static bool is_power_2(const unsigned n) {
27    return (n && ((n & (n - 1)) == 0));
28}
29
[5371]30inline static unsigned ceil_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
[5586]32    return (sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v - 1U);
[5371]33}
34
[5361]35inline static unsigned floor_log2(const unsigned v) {
36    assert ("log2(0) is undefined!" && v != 0);
[5586]37    return ((sizeof(unsigned) * CHAR_BIT) - 1U) - __builtin_clz(v);
[5361]38}
39
[5371]40inline static unsigned nearest_pow2(const unsigned v) {
[5586]41    assert (v > 0 && v < (UINT32_MAX / 2));
42    return (v < 2) ? 1 : (1U << ceil_log2(v));
[5227]43}
44
[5586]45inline static unsigned nearest_multiple(const unsigned n, const unsigned m) {
46    assert (is_power_2(m));
47    const unsigned r = (n + m - 1U) & -m;
48    assert (r >= n);
49    return r;
50}
51
52inline static bool is_multiple_of(const unsigned n, const unsigned m) {
53    return nearest_multiple(n, m) == n;
54}
55
56inline static unsigned udiv(const unsigned x, const unsigned y) {
57    assert (is_power_2(y));
58    const unsigned z = x >> floor_log2(y);
59    assert (z == (x / y));
60    return z;
61}
62
[5227]63inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
64    return (((x - 1) | (y - 1)) + 1) / y;
65}
66
[5329]67using TypeId = PabloAST::ClassTypeId;
68
69inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
[5705]70    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt) && !isa<IndexedAdvance>(stmt);
[5329]71}
72
[5371]73#define LONG_ADVANCE_BREAKPOINT 64
74
[4925]75/** ------------------------------------------------------------------------------------------------------------- *
[5063]76 * @brief initializeCarryData
[4925]77 ** ------------------------------------------------------------------------------------------------------------- */
[5712]78void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & b, PabloKernel * const kernel) {
[5045]79
[5227]80    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
81    // that is added to the Kernel. The scope index will indicate which struct to access.
[5045]82
[5227]83    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
84    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
85    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
86    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
87    // the fact every pablo While loop must be executed at least once.
88
89    // A nested loop may also contain a variable length CarryData struct
90
91    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
92    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
93    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
94    // set of carries for that iteration.
95
[5435]96    assert (mKernel == nullptr);
[5836]97    mCurrentScope = kernel->getEntryScope();
[5435]98    mKernel = kernel;
[5227]99
[5340]100    mCarryScopes = 0;
[5227]101
[5435]102    mCarryMetadata.resize(getScopeCount(mCurrentScope));
[5340]103
[5712]104    Type * const carryStateTy = analyse(b, mCurrentScope);
[5307]105
[6184]106    kernel->addInternalScalar(carryStateTy, "carries");
[5366]107
[5307]108    if (mHasLoop) {
[6184]109        kernel->addInternalScalar(b->getInt32Ty(), "selector");
[5307]110    }
[5311]111    if (mHasLongAdvance) {
[6184]112        kernel->addInternalScalar(b->getSizeTy(), "CarryBlockIndex");
[5311]113    }
[5708]114    for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
[6184]115        kernel->addInternalScalar(b->getSizeTy(), "IndexedAdvancePosition" + std::to_string(i));
[5708]116    }
[5493]117}
[5435]118
[5493]119bool isDynamicallyAllocatedType(const Type * const ty) {
120    if (isa<StructType>(ty) && ty->getStructNumElements() == 3) {
121        return (ty->getStructElementType(1)->isPointerTy() && ty->getStructElementType(2)->isPointerTy() && ty->getStructElementType(0)->isIntegerTy());
122    }
123    return false;
[4939]124}
125
[5493]126bool containsDynamicallyAllocatedType(const Type * const ty) {
127    if (isa<StructType>(ty)) {
128        for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
129            if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
130                return true;
131            }
132        }
133    }
134    return false;
135}
136
137void freeDynamicallyAllocatedMemory(const std::unique_ptr<kernel::KernelBuilder> & idb, Value * const frame) {
138    StructType * const ty = cast<StructType>(frame->getType()->getPointerElementType());
139    std::array<Value *, 3> indices;
140    indices[0] = idb->getInt32(0);
141    for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
142        if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
143            indices[1] = idb->getInt32(i);
144            indices[2] = idb->getInt32(1);
145            Value * const innerFrame = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
146            if (containsDynamicallyAllocatedType(innerFrame->getType())) {
147                indices[2] = indices[0];
148                Value *  const count = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
149                BasicBlock * const entry = idb->GetInsertBlock();
150                BasicBlock * const cond = idb->CreateBasicBlock("freeCarryDataCond");
151                BasicBlock * const body = idb->CreateBasicBlock("freeCarryDataLoop");
152                BasicBlock * const exit = idb->CreateBasicBlock("freeCarryDataExit");
153                idb->CreateBr(cond);
154                idb->SetInsertPoint(cond);
155                PHINode * const index = idb->CreatePHI(count->getType(), 2);
156                index->addIncoming(ConstantInt::getNullValue(count->getType()), entry);
157                Value * test = idb->CreateICmpNE(index, count);
158                idb->CreateCondBr(test, body, exit);
159                idb->SetInsertPoint(body);
160                freeDynamicallyAllocatedMemory(idb, idb->CreateGEP(innerFrame, index));
161                index->addIncoming(idb->CreateAdd(index, ConstantInt::get(count->getType(), 1)), body);
162                idb->CreateBr(cond);
163                idb->SetInsertPoint(exit);
164            }
165            idb->CreateFree(innerFrame);
166            indices[2] = idb->getInt32(2);
167            Value *  const summary = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
168            idb->CreateFree(summary);
169        }
170    }
171}
172
[4959]173/** ------------------------------------------------------------------------------------------------------------- *
[5486]174 * @brief releaseCarryData
175 ** ------------------------------------------------------------------------------------------------------------- */
[5493]176void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
177    if (mHasNonCarryCollapsingLoops) {
178        freeDynamicallyAllocatedMemory(idb, idb->getScalarFieldPtr("carries"));
179    }
[5486]180}
181
182/** ------------------------------------------------------------------------------------------------------------- *
[5828]183 * @brief clearCarryState
184 ** ------------------------------------------------------------------------------------------------------------- */
185void CarryManager::clearCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
186
187
188
189}
190
191/** ------------------------------------------------------------------------------------------------------------- *
[5063]192 * @brief initializeCodeGen
[4959]193 ** ------------------------------------------------------------------------------------------------------------- */
[5712]194void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5227]195
[5292]196    assert(!mCarryMetadata.empty());
[5227]197    mCarryInfo = &mCarryMetadata[0];
198    assert (!mCarryInfo->hasSummary());
[6275]199    if (LLVM_LIKELY(mKernel->isStateful())) {
200        mCurrentFrame = b->getScalarFieldPtr("carries");
201    } else {
202        mCurrentFrame = nullptr;
203    }
[4691]204    mCurrentFrameIndex = 0;
[5340]205    mCarryScopes = 0;
206    mCarryScopeIndex.push_back(0);
[5361]207
[5366]208    assert (mCarryFrameStack.empty());
[5361]209
[5368]210    assert (mCarrySummaryStack.empty());
[5361]211
[5712]212    Type * const carryTy = b->getBitBlockType();
[5435]213
214    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
215
[6275]216    if (mHasLoop) {
[5712]217        mLoopSelector = b->getScalarField("selector");
218        mNextLoopSelector = b->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
[5307]219    }
[5431]220
[4644]221}
222
[4959]223/** ------------------------------------------------------------------------------------------------------------- *
[5307]224 * @brief finalizeCodeGen
225 ** ------------------------------------------------------------------------------------------------------------- */
[5712]226void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5307]227    if (mHasLoop) {
[5712]228        b->setScalarField("selector", mNextLoopSelector);
[5307]229    }
[5311]230    if (mHasLongAdvance) {
[5712]231        Value * idx = b->getScalarField("CarryBlockIndex");
232        idx = b->CreateAdd(idx, b->getSize(1));
233        b->setScalarField("CarryBlockIndex", idx);
[5311]234    }
[6275]235    assert (mCarryFrameStack.empty());
[5371]236    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
237    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
[5368]238    mCarrySummaryStack.clear();
[5340]239    assert (mCarryScopeIndex.size() == 1);
240    mCarryScopeIndex.clear();
[5307]241}
242
243/** ------------------------------------------------------------------------------------------------------------- *
[5227]244 * @brief enterLoopScope
[4925]245 ** ------------------------------------------------------------------------------------------------------------- */
[5712]246void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
[5227]247    assert (scope);
[5366]248    assert (mHasLoop);
[5307]249    ++mLoopDepth;
[5712]250    enterScope(b, scope);
[4670]251}
252
[4925]253/** ------------------------------------------------------------------------------------------------------------- *
[5227]254 * @brief enterLoopBody
[4925]255 ** ------------------------------------------------------------------------------------------------------------- */
[5712]256void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock) {
[5227]257    if (mCarryInfo->hasSummary()) {
[5712]258        Type * const carryTy = b->getBitBlockType();
259        PHINode * phiCarryOutSummary = b->CreatePHI(carryTy, 2, "summary");
[5368]260        assert (!mCarrySummaryStack.empty());
261        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
262        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
263        // properly OR it into the outgoing summary value.
264        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
265        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
266        mCarrySummaryStack.back() = phiCarryOutSummary;
267        mCarrySummaryStack.push_back(phiCarryOutSummary);
[5227]268    }
[5366]269    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
[4708]270
[5366]271        assert (mCarryInfo->hasSummary());
[5361]272
[5712]273        Type * const int8PtrTy = b->getInt8PtrTy();
274        Type * const carryTy = b->getBitBlockType();
[5435]275        PointerType * const carryPtrTy = carryTy->getPointerTo();
276
[5227]277        // Check whether we need to resize the carry state
[5712]278        PHINode * index = b->CreatePHI(b->getSizeTy(), 2);
[5227]279        mLoopIndicies.push_back(index);
[5712]280        index->addIncoming(b->getSize(0), entryBlock);
281        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
282        Value * capacity = b->CreateLoad(capacityPtr, "capacity");
[5361]283        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
[5712]284        Value * arrayPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(1)});
285        Value * array = b->CreateLoad(arrayPtr, "array");
286        BasicBlock * const entry = b->GetInsertBlock();
287        BasicBlock * const resizeCarryState = b->CreateBasicBlock("ResizeCarryState");
288        BasicBlock * const reallocExisting = b->CreateBasicBlock("ReallocExisting");
289        BasicBlock * const createNew = b->CreateBasicBlock("CreateNew");
290        BasicBlock * const resumeKernel = b->CreateBasicBlock("ResumeKernel");
291        b->CreateLikelyCondBr(b->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
[5227]292
[5361]293        // RESIZE CARRY BLOCK
[5712]294        b->SetInsertPoint(resizeCarryState);
295        const auto BlockWidth = b->getBitBlockWidth() / 8;
[5361]296        const auto Log2BlockWidth = floor_log2(BlockWidth);
[5712]297        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), b->getSizeTy(), false);
298        Value * const summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
299        Value * const hasCarryState = b->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
300        b->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
[5245]301
[5361]302        // REALLOCATE EXISTING
[5712]303        b->SetInsertPoint(reallocExisting);
304        Value * const capacitySize = b->CreateMul(capacity, carryStateWidth);
305        Value * const newCapacitySize = b->CreateShl(capacitySize, 1); // x 2
306        Value * const newArray = b->CreateCacheAlignedMalloc(newCapacitySize);
307        b->CreateMemCpy(newArray, array, capacitySize, b->getCacheAlignment());
308        b->CreateFree(array);
309        b->CreateStore(newArray, arrayPtr);
310        Value * const startNewArrayPtr = b->CreateGEP(b->CreatePointerCast(newArray, int8PtrTy), capacitySize);
311        b->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
312        Value * const newCapacity = b->CreateShl(capacity, 1);
313        b->CreateStore(newCapacity, capacityPtr);
314        Value * const summary = b->CreateLoad(summaryPtr, false);
315        Value * const summarySize = b->CreateShl(b->CreateAdd(b->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
316        Constant * const additionalSpace = b->getSize(2 * BlockWidth);
317        Value * const newSummarySize = b->CreateAdd(summarySize, additionalSpace);
318        Value * const newSummary = b->CreateBlockAlignedMalloc(newSummarySize);
319        b->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
320        b->CreateFree(summary);
321        b->CreateStore(b->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
322        Value * const startNewSummaryPtr = b->CreateGEP(b->CreatePointerCast(newSummary, int8PtrTy), summarySize);
323        b->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
324        b->CreateBr(resumeKernel);
[5227]325
[5361]326        // CREATE NEW
[5712]327        b->SetInsertPoint(createNew);
328        Constant * const initialLog2Capacity = b->getInt64(4);
[5361]329        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
[5712]330        b->CreateStore(initialCapacity, capacityPtr);
[5361]331        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
[5712]332        Value * initialArray = b->CreateCacheAlignedMalloc(initialCapacitySize);
333        b->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
334        initialArray = b->CreatePointerCast(initialArray, array->getType());
335        b->CreateStore(initialArray, arrayPtr);
336        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, b->getInt64(1)), b->getInt64(Log2BlockWidth + 1));
337        Value * initialSummary = b->CreateBlockAlignedMalloc(initialSummarySize);
338        b->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
339        initialSummary = b->CreatePointerCast(initialSummary, carryPtrTy);
340        b->CreateStore(initialSummary, summaryPtr);
341        b->CreateBr(resumeKernel);
[5361]342
343        // RESUME KERNEL
[5712]344        b->SetInsertPoint(resumeKernel);
345        PHINode * phiArrayPtr = b->CreatePHI(array->getType(), 3);
[5361]346        phiArrayPtr->addIncoming(array, entry);
347        phiArrayPtr->addIncoming(initialArray, createNew);
348        phiArrayPtr->addIncoming(newArray, reallocExisting);
349
[5366]350        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
351        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
[5712]352        mCurrentFrame = b->CreateGEP(phiArrayPtr, index);
[4647]353    }
[4644]354}
355
[4925]356/** ------------------------------------------------------------------------------------------------------------- *
[5227]357 * @brief leaveLoopBody
[4925]358 ** ------------------------------------------------------------------------------------------------------------- */
[5712]359void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * /* exitBlock */) {
[5361]360
[5712]361    Type * const carryTy = b->getBitBlockType();
[5435]362
[5366]363    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
[5361]364
[5366]365        assert (mCarryInfo->hasSummary());
366
[5712]367        ConstantInt * const summaryIndex = b->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
[5366]368
[5712]369        Value * const carryInAccumulator = readCarryInSummary(b, summaryIndex);
[5368]370        Value * const carryOutAccumulator = mCarrySummaryStack.back();
[5361]371
[5366]372        if (mCarryInfo->hasExplicitSummary()) {
[5712]373            writeCarryOutSummary(b, carryOutAccumulator, summaryIndex);
[5366]374        }
375
376        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
377        mCarryFrameStack.pop_back();
378
[5361]379        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
380        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
381        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
382        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
383        // (for the subsequent block to subtract) using a half-adder circuit.
384
385        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
386        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
[5493]387        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
[5361]388
[5712]389        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
390        Value * capacity = b->CreateLoad(capacityPtr, false);
391        Value * summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
392        Value * summary = b->CreateLoad(summaryPtr, false);
[5361]393
394        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
395
[5712]396        Value * loopSelector = b->CreateZExt(mLoopSelector, capacity->getType());
[5361]397
[5712]398        BasicBlock * entry = b->GetInsertBlock();
399        BasicBlock * update = b->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
400        BasicBlock * resume = b->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
[5361]401
[5712]402        b->CreateBr(update);
[5361]403
[5712]404        b->SetInsertPoint(update);
405        PHINode * i = b->CreatePHI(capacity->getType(), 2);
[5361]406        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
[5712]407        PHINode * const borrow = b->CreatePHI(carryInAccumulator->getType(), 2);
[5361]408        borrow->addIncoming(carryInAccumulator, entry);
[5712]409        PHINode * const carry = b->CreatePHI(carryOutAccumulator->getType(), 2);
[5361]410        carry->addIncoming(carryOutAccumulator, entry);
411        // OR the updated carry in summary later for the summaryTest
[5712]412        PHINode * const carryInSummary = b->CreatePHI(carryTy, 2);
[5435]413        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
[5361]414
415        // half subtractor
[5712]416        Value * const carryInOffset = b->CreateOr(b->CreateShl(i, 1), loopSelector);
417        Value * const carryInPtr = b->CreateGEP(summary, carryInOffset);
418        Value * const carryIn = b->CreateBlockAlignedLoad(carryInPtr);
419        Value * const nextCarryIn = b->CreateXor(carryIn, borrow);
420        Value * const nextSummary = b->CreateOr(carryInSummary, nextCarryIn);
[5586]421
[5712]422        b->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
[5368]423        carryInSummary->addIncoming(nextSummary, update);
[5712]424        Value * finalBorrow = b->CreateAnd(b->CreateNot(carryIn), borrow);
[5361]425        borrow->addIncoming(finalBorrow, update);
426
427        // half adder
[5712]428        Value * const carryOutOffset = b->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
429        Value * const carryOutPtr = b->CreateGEP(summary, carryOutOffset);
430        Value * const carryOut = b->CreateBlockAlignedLoad(carryOutPtr);
431        Value * const nextCarryOut = b->CreateXor(carryOut, carry);
[5586]432
[5712]433        b->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
434        Value * finalCarry = b->CreateAnd(carryOut, carry);
[5361]435        carry->addIncoming(finalCarry, update);
436
437        // loop condition
[5712]438        i->addIncoming(b->CreateAdd(i, ONE), update);
439        b->CreateCondBr(b->CreateICmpNE(b->CreateShl(ONE, i), capacity), update, resume);
[5361]440
[5712]441        b->SetInsertPoint(resume);
[5361]442
[5771]443        b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry),
444                                   "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
[5368]445
[5292]446        assert (!mLoopIndicies.empty());
[5227]447        PHINode * index = mLoopIndicies.back();
[5712]448        index->addIncoming(b->CreateAdd(index, b->getSize(1)), resume);
[5227]449        mLoopIndicies.pop_back();
[5366]450
[5368]451        mNextSummaryTest = nextSummary;
[4647]452    }
[5361]453    if (mCarryInfo->hasSummary()) {
[5368]454        const auto n = mCarrySummaryStack.size(); assert (n > 1);
455        Value * carryOut = mCarrySummaryStack.back();
456        mCarrySummaryStack.pop_back();
457        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
[5712]458        phiCarryOut->addIncoming(carryOut, b->GetInsertBlock());
[5368]459        // If we're returning to the base scope, reset our accumulated summary value.
460        if (n == 2) {
[5435]461            carryOut = Constant::getNullValue(carryTy);
[5368]462        }
463        mCarrySummaryStack.back() = carryOut;
[5361]464    }
[4644]465}
[4647]466
[4925]467/** ------------------------------------------------------------------------------------------------------------- *
[5227]468 * @brief leaveLoopScope
[4925]469 ** ------------------------------------------------------------------------------------------------------------- */
[5712]470void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
[5227]471    assert (mLoopDepth > 0);
[5307]472    --mLoopDepth;
[5712]473    leaveScope(b);
[4644]474}
475
[4925]476/** ------------------------------------------------------------------------------------------------------------- *
[5227]477 * @brief enterIfScope
[4925]478 ** ------------------------------------------------------------------------------------------------------------- */
[5712]479void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
[5227]480    ++mIfDepth;
[5712]481    enterScope(b, scope);
[5368]482    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
483    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
484    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
[5366]485    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
486        assert (mCurrentFrameIndex == 0);
[5712]487        mNextSummaryTest = readCarryInSummary(b, b->getInt32(0));
[5366]488        if (mCarryInfo->hasExplicitSummary()) {
489            mCurrentFrameIndex = 1;
490        }
491    }
[5712]492    Type * const carryTy = b->getBitBlockType();
[5435]493    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
[4644]494}
495
[4925]496/** ------------------------------------------------------------------------------------------------------------- *
[5227]497 * @brief generateSummaryTest
[4925]498 ** ------------------------------------------------------------------------------------------------------------- */
[5712]499Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & b, Value * condition) {
[5227]500    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
[5366]501        assert ("summary test was not generated" && mNextSummaryTest);
[5712]502        condition = b->simd_or(condition, mNextSummaryTest);
[5366]503        mNextSummaryTest = nullptr;
[4703]504    }
[5366]505    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
[5227]506    return condition;
[4704]507}
[4925]508
509/** ------------------------------------------------------------------------------------------------------------- *
[5227]510 * @brief enterIfBody
[4925]511 ** ------------------------------------------------------------------------------------------------------------- */
[5712]512void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* b */, BasicBlock * const entryBlock) {
[5366]513    assert (entryBlock);
[4644]514}
[4922]515
[4925]516/** ------------------------------------------------------------------------------------------------------------- *
[5227]517 * @brief leaveIfBody
[4925]518 ** ------------------------------------------------------------------------------------------------------------- */
[5712]519void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const exitBlock) {
[5366]520    assert (exitBlock);
[5368]521    const auto n = mCarrySummaryStack.size();
522    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
[5712]523        writeCarryOutSummary(b, mCarrySummaryStack[n - 1], b->getInt32(0));
[4644]524    }
[5366]525    if (n > 2) {
[5712]526        mCarrySummaryStack[n - 1] = b->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
[4644]527    }
528}
529
[4925]530/** ------------------------------------------------------------------------------------------------------------- *
[5227]531 * @brief leaveIfScope
[4925]532 ** ------------------------------------------------------------------------------------------------------------- */
[5712]533void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
[5227]534    assert (mIfDepth > 0);
[5366]535    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
[5368]536        const auto n = mCarrySummaryStack.size(); assert (n > 0);
[5366]537        if (n > 2) {
[5227]538            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
539            // appropriate summary is stored in the outer scope.
[5368]540            Value * nested = mCarrySummaryStack[n - 1];
541            Value * outer = mCarrySummaryStack[n - 2];
542            assert (nested->getType() == outer->getType());
[5712]543            PHINode * const phi = b->CreatePHI(nested->getType(), 2, "summary");
[5368]544            phi->addIncoming(outer, entryBlock);
545            phi->addIncoming(nested, exitBlock);
546            mCarrySummaryStack[n - 2] = phi;
[5366]547        }
[4644]548    }
[5227]549    --mIfDepth;
[5712]550    leaveScope(b);
[5368]551    mCarrySummaryStack.pop_back();
[4644]552}
553
[5227]554/** ------------------------------------------------------------------------------------------------------------ *
555 * @brief enterScope
[4925]556 ** ------------------------------------------------------------------------------------------------------------- */
[5712]557void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
[5227]558    assert (scope);
559    // Store the state of the current frame and update the scope state
[5366]560    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
[5227]561    mCurrentScope = scope;
[5340]562    mCarryScopeIndex.push_back(++mCarryScopes);
563    mCarryInfo = &mCarryMetadata[mCarryScopes];
[5353]564    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
[5227]565    // compilation or a memory corruption has occured.
566    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
[5712]567    mCurrentFrame = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
[5227]568    // Verify we're pointing to a carry frame struct
569    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
[5366]570    mCurrentFrameIndex = 0;
[4644]571}
572
[4925]573/** ------------------------------------------------------------------------------------------------------------- *
[5227]574 * @brief leaveScope
[4925]575 ** ------------------------------------------------------------------------------------------------------------- */
[5712]576void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* b */) {
[4925]577
[5227]578    // Did we use all of the packs in this carry struct?
579    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
580    // Sanity test: are there remaining carry frames?
[5366]581    assert (!mCarryFrameStack.empty());
[5227]582
[5366]583    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
[5227]584
585    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
586
[5366]587    mCarryFrameStack.pop_back();
[5340]588    mCarryScopeIndex.pop_back();
589    assert (!mCarryScopeIndex.empty());
[5227]590    mCurrentScope = mCurrentScope->getPredecessor();
591    assert (mCurrentScope);
[5340]592    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
[4644]593}
594
[4925]595/** ------------------------------------------------------------------------------------------------------------- *
[5227]596 * @brief addCarryInCarryOut
[4925]597 ** ------------------------------------------------------------------------------------------------------------- */
[5712]598Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Statement * const operation, Value * const e1, Value * const e2) {
[5329]599    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
[5712]600    Value * const carryIn = getNextCarryIn(b);
[5227]601    Value * carryOut, * result;
[5712]602    std::tie(carryOut, result) = b->bitblock_add_with_carry(e1, e2, carryIn);
603    setNextCarryOut(b, carryOut);
604    assert (result->getType() == b->getBitBlockType());
[5227]605    return result;
[4925]606}
607
608/** ------------------------------------------------------------------------------------------------------------- *
[5227]609 * @brief advanceCarryInCarryOut
[4925]610 ** ------------------------------------------------------------------------------------------------------------- */
[5712]611Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Advance * const advance, Value * const value) {
[5227]612    const auto shiftAmount = advance->getAmount();
[5371]613    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
[5712]614        Value * const carryIn = getNextCarryIn(b);
[5227]615        Value * carryOut, * result;
[5712]616        std::tie(carryOut, result) = b->bitblock_advance(value, carryIn, shiftAmount);
617        setNextCarryOut(b, carryOut);
618        assert (result->getType() == b->getBitBlockType());
[5227]619        return result;
620    } else {
[5712]621        return longAdvanceCarryInCarryOut(b, value, shiftAmount);
[4925]622    }
623}
624
[5712]625/** ------------------------------------------------------------------------------------------------------------- *
626 * @brief indexedAdvanceCarryInCarryOut
627 ** ------------------------------------------------------------------------------------------------------------- */
[5708]628Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
629    const auto shiftAmount = advance->getAmount();
[5713]630    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
[5708]631        Value * const carryIn = getNextCarryIn(b);
[5713]632        Value * carryOut, * result;
633        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
[5710]634        setNextCarryOut(b, carryOut);
[5708]635        return result;
[5711]636    } else if (shiftAmount <= b->getBitBlockWidth()) {
[5713]637        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
638        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
639        Value * carryOut, * result;
640        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
641        b->CreateBlockAlignedStore(carryOut, carryPtr);
[5712]642        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
643            addToCarryOutSummary(b, strm);
644        }
[5711]645        return result;
[5708]646    } else {
[5715]647        unsigned summaryFrame = mCurrentFrameIndex;
648        if (mIfDepth > 0) {
649            // Skip over summary frame to perform the long indexed advance.
650            mCurrentFrameIndex++;
651        }
652        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
653        Constant * blockWidth = b->getSize(b->getBitBlockWidth());
654        Constant * blockWidth_1 = b->getSize(b->getBitBlockWidth() - 1);
655        Value * carryPosition = b->getScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex));
656        Value * carryBlockEndPos = b->CreateAdd(carryPosition, blockWidth_1);
[5716]657        unsigned carry_blocks = nearest_pow2(1+ceil_udiv(shiftAmount, b->getBitBlockWidth()));
[5715]658        Constant * carryQueueBlocks = b->getSize(carry_blocks);
659        Value * carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
660        Value * carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryBlockEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
661        Value * lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
662        Value * hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
663        Value * c_lo = b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock);
664        Value * c_hi = b->CreateBitCast(b->CreateBlockAlignedLoad(hi_GEP), iBitBlock);
665        Value * lo_shift = b->CreateZExt(b->CreateURem(carryPosition, blockWidth), iBitBlock);
666        Value * hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryBlockEndPos, blockWidth)), iBitBlock);
667        Value * carryIn = b->CreateOr(b->CreateLShr(c_lo, lo_shift), b->CreateShl(c_hi, hi_shift));
668        Value * carryOut, * result;
669        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
670        carryOut = b->CreateBitCast(carryOut, iBitBlock);
671        Value * adv = b->mvmd_extract(sizeof(size_t) * 8, b->simd_popcount(b->getBitBlockWidth(), index_strm), 0);
672        b->setScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex), b->CreateAdd(carryPosition, adv));
673        Value * carryOutPosition = b->CreateAdd(carryPosition, b->getSize(shiftAmount));
674        Value * carryOutEndPos = b->CreateAdd(carryOutPosition, blockWidth_1);
675        carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
676        carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
677        lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
678        hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
679        lo_shift = b->CreateZExt(b->CreateURem(carryOutPosition, blockWidth), iBitBlock);
680        hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryOutEndPos, blockWidth)), iBitBlock);
681        c_lo = b->CreateOr(b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock), b->CreateShl(carryOut, lo_shift));
682        c_hi = b->CreateLShr(carryOut, hi_shift);
683        b->CreateBlockAlignedStore(b->CreateBitCast(c_lo, b->getBitBlockType()), lo_GEP);
684        b->CreateBlockAlignedStore(b->CreateBitCast(c_hi, b->getBitBlockType()), hi_GEP);
[5711]685        mIndexedLongAdvanceIndex++;
[5715]686        mCurrentFrameIndex++;
687        // Now handle the summary.
688        if (mIfDepth > 0) {
689            const auto summaryBlocks = ceil_udiv(shiftAmount, b->getBitBlockWidth());
690            const auto summarySize = ceil_udiv(summaryBlocks, b->getBitBlockWidth());
691            for (unsigned i = 0; i < summarySize; i++) {
692                // All ones summary for now.
693                b->CreateBlockAlignedStore(b->allOnes(), b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(summaryFrame), b->getInt32(i)}));
694            }
695        }
696        return result;
[5708]697    }
[5705]698}
699
[5708]700
[4925]701/** ------------------------------------------------------------------------------------------------------------- *
[5227]702 * @brief longAdvanceCarryInCarryOut
[4925]703 ** ------------------------------------------------------------------------------------------------------------- */
[5712]704inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value, const unsigned shiftAmount) {
[4925]705
[5311]706    assert (mHasLongAdvance);
[5371]707    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
[5706]708    assert (value);
[4925]709
[5712]710    const auto blockWidth = b->getBitBlockWidth();
711    Type * const streamTy = b->getIntNTy(blockWidth);
[5227]712
[5586]713    Value * indices[3];
714
[5712]715    indices[0] = b->getInt32(0);
[5586]716
[5371]717    if (mIfDepth > 0) {
[5586]718        if (shiftAmount > blockWidth) {
719
[5706]720            // TODO: once CEILING(shiftAmount / 256) > 2, consider using a half-adder/subtractor strategy?
721
[5712]722            Value * carry = b->CreateZExt(b->bitblock_any(value), streamTy);
[5586]723            const auto summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
724            const auto summarySize = ceil_udiv(summaryBlocks, blockWidth);
[5712]725            VectorType * const bitBlockTy = b->getBitBlockType();
[5586]726            IntegerType * const laneTy = cast<IntegerType>(bitBlockTy->getVectorElementType());
727            const auto laneWidth = laneTy->getIntegerBitWidth();
728
729            assert (summarySize > 0);
730            assert (is_power_2(laneWidth));
731
[5712]732            indices[1] = b->getInt32(mCurrentFrameIndex++);
[5586]733
734            for (unsigned i = 1;;++i) {
735
736                assert (i <= summarySize);
737
[5712]738                indices[2] = b->getInt32(i - 1);
[5586]739
[5712]740                Value * const ptr = b->CreateGEP(mCurrentFrame, indices);
741                Value * const prior = b->CreateBitCast(b->CreateBlockAlignedLoad(ptr), streamTy);
[5586]742
743                Value * advanced = nullptr;
744                if (LLVM_LIKELY(summaryBlocks < laneWidth)) {
[5712]745                    advanced = b->CreateOr(b->CreateShl(prior, 1), carry);
746                    carry = b->CreateLShr(prior, summaryBlocks - 1);
[5586]747                } else {
[5712]748                    std::tie(advanced, carry) = b->bitblock_advance(prior, carry, 1);
[5586]749                }
[5712]750                Value * stream = b->CreateBitCast(advanced, bitBlockTy);
[5371]751                if (LLVM_LIKELY(i == summarySize)) {
[5586]752                    const auto n = bitBlockTy->getVectorNumElements();
[6275]753                    Constant * mask[n];
[5586]754                    const auto m = udiv(summaryBlocks, laneWidth);
755                    if (m) {
756                        std::fill_n(mask, m, ConstantInt::getAllOnesValue(laneTy));
757                    }
758                    mask[m] = ConstantInt::get(laneTy, (1UL << (summaryBlocks & (laneWidth - 1))) - 1UL);
759                    if (n > m) {
760                        std::fill_n(mask + m + 1, n - m, UndefValue::get(laneTy));
761                    }
[5712]762                    stream = b->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
763                    addToCarryOutSummary(b, stream);
764                    b->CreateBlockAlignedStore(stream, ptr);
[5371]765                    break;
766                }
[5712]767                addToCarryOutSummary(b, stream);
768                b->CreateBlockAlignedStore(stream, ptr);
[4715]769            }
[5586]770
[5371]771        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
[5712]772            addToCarryOutSummary(b, value);
[4710]773        }
774    }
[5586]775
[5712]776    indices[1] = b->getInt32(mCurrentFrameIndex++);
[5586]777
[5371]778    // special case using a single buffer entry and the carry_out value.
[5586]779    if (LLVM_LIKELY((shiftAmount < blockWidth) && (mLoopDepth == 0))) {
780
[5712]781        indices[2] = indices[0]; // b->getInt32(0)
[5586]782        assert (cast<ConstantInt>(indices[2])->isNullValue());
783
[5712]784        Value * const buffer = b->CreateGEP(mCurrentFrame, indices);
785        assert (buffer->getType()->getPointerElementType() == b->getBitBlockType());
786        Value * carryIn = b->CreateBlockAlignedLoad(buffer);
[5586]787
[5712]788        b->CreateBlockAlignedStore(value, buffer);
[5371]789        /* Very special case - no combine */
[5586]790        if (LLVM_UNLIKELY(shiftAmount == blockWidth)) {
[5712]791            return b->CreateBitCast(carryIn, b->getBitBlockType());
[5371]792        }
[5712]793        Value* block0_shr = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
794        Value* block1_shl = b->CreateShl(b->CreateBitCast(value, streamTy), shiftAmount);
795        return b->CreateBitCast(b->CreateOr(block1_shl, block0_shr), b->getBitBlockType());
[5371]796    } else { //
[5586]797        const unsigned blockShift = shiftAmount & (blockWidth - 1);
798        const unsigned summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
799
[5371]800        // Create a mask to implement circular buffer indexing
[5712]801        Value * indexMask = b->getSize(nearest_pow2(summaryBlocks) - 1);
802        Value * blockIndex = b->getScalarField("CarryBlockIndex");
[5586]803
[5712]804        Value * carryIndex0 = b->CreateSub(blockIndex, b->getSize(summaryBlocks));
805        indices[2] = b->CreateAnd(carryIndex0, indexMask);
806        Value * const carryInPtr = b->CreateGEP(mCurrentFrame, indices);
807        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
[4925]808
[5712]809        indices[2] = b->CreateAnd(blockIndex, indexMask);
810        Value * const carryOutPtr = b->CreateGEP(mCurrentFrame, indices);
811        assert (carryIn->getType() == b->getBitBlockType());
[5366]812
[5371]813        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
814        // block in the long advance carry data area.
815        if (LLVM_UNLIKELY(blockShift == 0)) {
[5712]816            b->CreateBlockAlignedStore(value, carryOutPtr);
[5371]817            return carryIn;
818        } else { // Otherwise we need to combine data from the two oldest blocks.
[5712]819            Value * const carryIndex1 = b->CreateSub(blockIndex, b->getSize(summaryBlocks - 1));
820            indices[2] = b->CreateAnd(carryIndex1, indexMask);
[5586]821
[5712]822            Value * const carryInPtr2 = b->CreateGEP(mCurrentFrame, indices);
823            Value * const carryIn2 = b->CreateBlockAlignedLoad(carryInPtr2);
[5706]824            assert (carryOutPtr->getType()->getPointerElementType() == value->getType());
[5712]825            b->CreateBlockAlignedStore(value, carryOutPtr);
[5586]826
[5712]827            Value * const b0 = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
828            Value * const b1 = b->CreateShl(b->CreateBitCast(carryIn2, streamTy), blockShift);
829            return b->CreateBitCast(b->CreateOr(b1, b0), b->getBitBlockType());
[5371]830        }
[5227]831    }
832}
[4925]833
[5227]834/** ------------------------------------------------------------------------------------------------------------- *
835 * @brief getNextCarryIn
836 ** ------------------------------------------------------------------------------------------------------------- */
[5712]837Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & b) {
[5227]838    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
[5366]839    if (mLoopDepth == 0) {
[5712]840        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
[5366]841    } else {
[5712]842        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mLoopSelector});
[5366]843    }
[5712]844    Type * const carryTy = b->getBitBlockType();
[5435]845    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
[5712]846    Value * const carryIn = b->CreateBlockAlignedLoad(mCarryPackPtr);
[5233]847    if (mLoopDepth > 0) {
[5712]848        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
[5233]849    }
850    return carryIn;
[5227]851}
[4925]852
[5227]853/** ------------------------------------------------------------------------------------------------------------- *
854 * @brief setNextCarryOut
855 ** ------------------------------------------------------------------------------------------------------------- */
[5712]856void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * carryOut) {
857    Type * const carryTy = b->getBitBlockType();
[5366]858    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
[5712]859    carryOut = b->CreateBitCast(carryOut, carryTy);
[5368]860    if (mCarryInfo->hasSummary()) {
[5712]861        addToCarryOutSummary(b, carryOut);
[5227]862    }
[5371]863    if (mLoopDepth != 0) {
[5712]864        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mNextLoopSelector});
[5368]865        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
[5712]866            Value * accum = b->CreateBlockAlignedLoad(mCarryPackPtr);
867            carryOut = b->CreateOr(carryOut, accum);
[5368]868        }
[5227]869    }
[5366]870    ++mCurrentFrameIndex;
[5435]871    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
[5712]872    b->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
[4644]873}
874
[4925]875/** ------------------------------------------------------------------------------------------------------------- *
[5366]876 * @brief readCarryInSummary
[4925]877 ** ------------------------------------------------------------------------------------------------------------- */
[5712]878Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & b, ConstantInt * index) const {
[5366]879    assert (mCarryInfo->hasSummary());
880    unsigned count = 2;
881    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
882        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
883        count = 1;
884        while (frameTy->isStructTy()) {
885            ++count;
[5586]886            assert (frameTy->getStructNumElements() > 0);
[5366]887            frameTy = frameTy->getStructElementType(0);
888        }
[5227]889    }
[5366]890    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
891    Value * indicies[length];
[5712]892    std::fill(indicies, indicies + count - 1, b->getInt32(0));
[5366]893    indicies[count - 1] = index;
[5368]894    if (mLoopDepth != 0) {
[5366]895        indicies[count] = mLoopSelector;
896    }
[5371]897
[5366]898    ArrayRef<Value *> ar(indicies, length);
[5712]899    Value * const ptr = b->CreateGEP(mCurrentFrame, ar);
900    Value * const summary = b->CreateBlockAlignedLoad(ptr);
[5368]901    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
[5712]902        Type * const carryTy = b->getBitBlockType();
903        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
[5366]904    }
905    return summary;
[4720]906}
[4726]907
[4925]908/** ------------------------------------------------------------------------------------------------------------- *
[5366]909 * @brief writeCarryOutSummary
[4925]910 ** ------------------------------------------------------------------------------------------------------------- */
[5712]911inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const summary, ConstantInt * index) const {
[5366]912    Value * ptr = nullptr;
913    assert (mCarryInfo->hasExplicitSummary());
914    if (mLoopDepth > 0) {
[5712]915        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index, mNextLoopSelector});
[5366]916    } else {
[5712]917        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index});
[5366]918    }
[5712]919    b->CreateBlockAlignedStore(summary, ptr);
[5227]920}
921
922/** ------------------------------------------------------------------------------------------------------------- *
[5366]923 * @brief addToCarryOutSummary
924 ** ------------------------------------------------------------------------------------------------------------- */
[5712]925inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value) {
[6275]926    assert ("cannot add null summary value!" && value);
[5368]927    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
928    assert (mCarryInfo->hasSummary());
[5712]929    mCarrySummaryStack.back() = b->CreateOr(value, mCarrySummaryStack.back());
[5366]930}
931
932/** ------------------------------------------------------------------------------------------------------------- *
[5227]933 * @brief enumerate
934 ** ------------------------------------------------------------------------------------------------------------- */
[5435]935unsigned CarryManager::getScopeCount(const PabloBlock * const scope, unsigned index) {
936    for (const Statement * stmt : *scope) {
[5227]937        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
[5340]938            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
[5227]939        }
[4925]940    }
[5340]941    return index + 1;
[4925]942}
943
944/** ------------------------------------------------------------------------------------------------------------- *
[5353]945 * @brief hasIterationSpecificAssignment
[4925]946 ** ------------------------------------------------------------------------------------------------------------- */
[5706]947bool isNonRegularLanguage(const PabloBlock * const scope) {
948    if (const Branch * br = scope->getBranch()) {
949        return !br->isRegular();
[5227]950    }
951    return false;
[4925]952}
953
[5586]954static bool hasNonEmptyCarryStruct(const Type * const frameTy) {
955    if (frameTy->isStructTy()) {
956        for (unsigned i = 0; i < frameTy->getStructNumElements(); ++i) {
957            if (hasNonEmptyCarryStruct(frameTy->getStructElementType(i))) {
958                return true;
959            }
960        }
961        return false;
962    }
963    return true;
964}
965
966static bool hasNonEmptyCarryStruct(const std::vector<Type *> & state) {
967    for (const Type * const frameTy : state) {
968        if (hasNonEmptyCarryStruct(frameTy)) {
969            return true;
970        }
971    }
972    return false;
973}
974
[5227]975/** ------------------------------------------------------------------------------------------------------------- *
976 * @brief analyse
977 ** ------------------------------------------------------------------------------------------------------------- */
[5712]978StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope,
[5493]979                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
[5435]980    assert ("scope cannot be null!" && scope);
[5493]981    assert ("entry scope (and only the entry scope) must be in scope 0"
[5836]982            && (mCarryScopes == 0 ? (scope == mKernel->getEntryScope()) : (scope != mKernel->getEntryScope())));
[5347]983    assert (mCarryScopes < mCarryMetadata.size());
[5712]984    Type * const carryTy = b->getBitBlockType();
985    Type * const blockTy = b->getBitBlockType();
[4925]986
[5366]987    const unsigned carryScopeIndex = mCarryScopes++;
[5706]988    const bool nonCarryCollapsingMode = isNonRegularLanguage(scope);
[5435]989    Type * const carryPackType = (loopDepth == 0) ? carryTy : ArrayType::get(carryTy, 2);
[5347]990    std::vector<Type *> state;
[5435]991    for (const Statement * stmt : *scope) {
[5708]992        if (LLVM_UNLIKELY(isa<Advance>(stmt) || isa<IndexedAdvance>(stmt))) {
993            int64_t amount;
994            if (isa<Advance>(stmt)) amount = cast<Advance>(stmt)->getAmount();
995            else amount = cast<IndexedAdvance>(stmt)->getAmount();
[5366]996            Type * type = carryPackType;
[5371]997            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
[5712]998                const auto blockWidth = b->getBitBlockWidth();
[5586]999                const auto blocks = ceil_udiv(amount, blockWidth);
[5716]1000                type = ArrayType::get(blockTy, nearest_pow2(blocks + (isa<IndexedAdvance>(stmt) ? 1:0) + ((loopDepth != 0) ? 1 : 0)));
[5586]1001                if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) {
1002                    const auto summarySize = ceil_udiv(blocks, blockWidth);
[5371]1003                    // 1 bit will mark the presense of any bit in each block.
[5586]1004                    state.push_back(ArrayType::get(blockTy, summarySize));
[5227]1005                }
[5708]1006                mHasLongAdvance = true;
1007                if (isa<IndexedAdvance>(stmt)) mIndexedLongAdvanceTotal++;
[5227]1008            }
[5366]1009            state.push_back(type);
[5329]1010        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
[5227]1011            state.push_back(carryPackType);
1012        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
[5712]1013            state.push_back(analyse(b, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
[5227]1014        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
[5307]1015            mHasLoop = true;
[5712]1016            state.push_back(analyse(b, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
[5227]1017        }
1018    }
[5366]1019    // Build the carry state struct and add the summary pack if needed.
1020    CarryData & cd = mCarryMetadata[carryScopeIndex];
[5227]1021    StructType * carryState = nullptr;
[5354]1022    CarryData::SummaryType summaryType = CarryData::NoSummary;
[5227]1023    if (LLVM_UNLIKELY(state.empty())) {
[5712]1024        carryState = StructType::get(b->getContext());
[5227]1025    } else {
[5586]1026        // do we have a summary or a sequence of nested empty structs?
1027        if (hasNonEmptyCarryStruct(state)) {
1028            if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
1029                if (LLVM_LIKELY(state.size() > 1)) {
1030                    summaryType = CarryData::ExplicitSummary;
1031                    // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
1032                    // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
1033                    state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
1034                } else {
1035                    summaryType = CarryData::ImplicitSummary;
1036                    if (hasNonEmptyCarryStruct(state[0])) {
1037                        summaryType = CarryData::BorrowedSummary;
1038                    }
[5227]1039                }
[5586]1040            }
[5227]1041        }
[5712]1042        carryState = StructType::get(b->getContext(), state);
[5361]1043        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
1044        // carry state pointer, and summary pointer struct.
1045        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
[5493]1046            mHasNonCarryCollapsingLoops = true;
[5733]1047            carryState = StructType::get(b->getContext(), {b->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo()});
[5493]1048            assert (isDynamicallyAllocatedType(carryState));
[5227]1049        }
[5366]1050        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
[4712]1051    }
[5354]1052    cd.setSummaryType(summaryType);
[5227]1053    return carryState;
[4710]1054}
1055
[5267]1056/** ------------------------------------------------------------------------------------------------------------- *
1057 * @brief constructor
1058 ** ------------------------------------------------------------------------------------------------------------- */
[5435]1059CarryManager::CarryManager() noexcept
1060: mKernel(nullptr)
[5307]1061, mCurrentFrame(nullptr)
[5267]1062, mCurrentFrameIndex(0)
1063, mCurrentScope(nullptr)
1064, mCarryInfo(nullptr)
[5366]1065, mNextSummaryTest(nullptr)
[5267]1066, mIfDepth(0)
[5311]1067, mHasLongAdvance(false)
[5708]1068, mIndexedLongAdvanceTotal(0)
1069, mIndexedLongAdvanceIndex(0)
[5493]1070, mHasNonCarryCollapsingLoops(false)
[5307]1071, mHasLoop(false)
1072, mLoopDepth(0)
[5340]1073, mLoopSelector(nullptr)
[5366]1074, mNextLoopSelector(nullptr)
[5371]1075, mCarryPackPtr(nullptr)
[5340]1076, mCarryScopes(0) {
[5267]1077
[4712]1078}
1079
[5267]1080}
Note: See TracBrowser for help on using the repository browser.