source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5842

Last change on this file since 5842 was 5836, checked in by nmedfort, 19 months ago

Added PabloBlock/Builder? createScope() methods + minor code changes.

File size: 53.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <llvm/IR/DerivedTypes.h>
12#include <llvm/Transforms/Utils/Local.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18#include <kernels/kernel_builder.h>
19#include <toolchain/toolchain.h>
20#include <array>
21
22using namespace llvm;
23
24namespace pablo {
25
26inline static bool is_power_2(const unsigned n) {
27    return (n && ((n & (n - 1)) == 0));
28}
29
30inline static unsigned ceil_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
32    return (sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v - 1U);
33}
34
35inline static unsigned floor_log2(const unsigned v) {
36    assert ("log2(0) is undefined!" && v != 0);
37    return ((sizeof(unsigned) * CHAR_BIT) - 1U) - __builtin_clz(v);
38}
39
40inline static unsigned nearest_pow2(const unsigned v) {
41    assert (v > 0 && v < (UINT32_MAX / 2));
42    return (v < 2) ? 1 : (1U << ceil_log2(v));
43}
44
45inline static unsigned nearest_multiple(const unsigned n, const unsigned m) {
46    assert (is_power_2(m));
47    const unsigned r = (n + m - 1U) & -m;
48    assert (r >= n);
49    return r;
50}
51
52inline static bool is_multiple_of(const unsigned n, const unsigned m) {
53    return nearest_multiple(n, m) == n;
54}
55
56inline static unsigned udiv(const unsigned x, const unsigned y) {
57    assert (is_power_2(y));
58    const unsigned z = x >> floor_log2(y);
59    assert (z == (x / y));
60    return z;
61}
62
63inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
64    return (((x - 1) | (y - 1)) + 1) / y;
65}
66
67using TypeId = PabloAST::ClassTypeId;
68
69inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
70    return isa<CarryProducingStatement>(stmt) && !isa<Advance>(stmt) && !isa<IndexedAdvance>(stmt);
71}
72
73#define LONG_ADVANCE_BREAKPOINT 64
74
75/** ------------------------------------------------------------------------------------------------------------- *
76 * @brief initializeCarryData
77 ** ------------------------------------------------------------------------------------------------------------- */
78void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & b, PabloKernel * const kernel) {
79
80    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
81    // that is added to the Kernel. The scope index will indicate which struct to access.
82
83    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
84    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
85    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
86    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
87    // the fact every pablo While loop must be executed at least once.
88
89    // A nested loop may also contain a variable length CarryData struct
90
91    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
92    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
93    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
94    // set of carries for that iteration.
95
96    assert (mKernel == nullptr);
97    mCurrentScope = kernel->getEntryScope();
98    mKernel = kernel;
99
100    mCarryScopes = 0;
101
102    mCarryMetadata.resize(getScopeCount(mCurrentScope));
103
104    Type * const carryStateTy = analyse(b, mCurrentScope);
105
106    kernel->addScalar(carryStateTy, "carries");
107
108    if (mHasLoop) {
109        kernel->addScalar(b->getInt32Ty(), "selector");
110    }
111    if (mHasLongAdvance) {
112        kernel->addScalar(b->getSizeTy(), "CarryBlockIndex");
113    }
114    for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
115        kernel->addScalar(b->getSizeTy(), "IndexedAdvancePosition" + std::to_string(i));
116    }
117}
118
119bool isDynamicallyAllocatedType(const Type * const ty) {
120    if (isa<StructType>(ty) && ty->getStructNumElements() == 3) {
121        return (ty->getStructElementType(1)->isPointerTy() && ty->getStructElementType(2)->isPointerTy() && ty->getStructElementType(0)->isIntegerTy());
122    }
123    return false;
124}
125
126bool containsDynamicallyAllocatedType(const Type * const ty) {
127    if (isa<StructType>(ty)) {
128        for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
129            if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
130                return true;
131            }
132        }
133    }
134    return false;
135}
136
137void freeDynamicallyAllocatedMemory(const std::unique_ptr<kernel::KernelBuilder> & idb, Value * const frame) {
138    StructType * const ty = cast<StructType>(frame->getType()->getPointerElementType());
139    std::array<Value *, 3> indices;
140    indices[0] = idb->getInt32(0);
141    for (unsigned i = 0; i < ty->getStructNumElements(); ++i) {
142        if (isDynamicallyAllocatedType(ty->getStructElementType(i))) {
143            indices[1] = idb->getInt32(i);
144            indices[2] = idb->getInt32(1);
145            Value * const innerFrame = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
146            if (containsDynamicallyAllocatedType(innerFrame->getType())) {
147                indices[2] = indices[0];
148                Value *  const count = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
149                BasicBlock * const entry = idb->GetInsertBlock();
150                BasicBlock * const cond = idb->CreateBasicBlock("freeCarryDataCond");
151                BasicBlock * const body = idb->CreateBasicBlock("freeCarryDataLoop");
152                BasicBlock * const exit = idb->CreateBasicBlock("freeCarryDataExit");
153                idb->CreateBr(cond);
154                idb->SetInsertPoint(cond);
155                PHINode * const index = idb->CreatePHI(count->getType(), 2);
156                index->addIncoming(ConstantInt::getNullValue(count->getType()), entry);
157                Value * test = idb->CreateICmpNE(index, count);
158                idb->CreateCondBr(test, body, exit);
159                idb->SetInsertPoint(body);
160                freeDynamicallyAllocatedMemory(idb, idb->CreateGEP(innerFrame, index));
161                index->addIncoming(idb->CreateAdd(index, ConstantInt::get(count->getType(), 1)), body);
162                idb->CreateBr(cond);
163                idb->SetInsertPoint(exit);
164            }
165            idb->CreateFree(innerFrame);
166            indices[2] = idb->getInt32(2);
167            Value *  const summary = idb->CreateLoad(idb->CreateGEP(frame, ArrayRef<Value*>(indices.data(), 3)));
168            idb->CreateFree(summary);
169        }
170    }
171}
172
173/** ------------------------------------------------------------------------------------------------------------- *
174 * @brief releaseCarryData
175 ** ------------------------------------------------------------------------------------------------------------- */
176void CarryManager::releaseCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
177    if (mHasNonCarryCollapsingLoops) {
178        freeDynamicallyAllocatedMemory(idb, idb->getScalarFieldPtr("carries"));
179    }
180}
181
182/** ------------------------------------------------------------------------------------------------------------- *
183 * @brief clearCarryState
184 ** ------------------------------------------------------------------------------------------------------------- */
185void CarryManager::clearCarryData(const std::unique_ptr<kernel::KernelBuilder> & idb) {
186
187
188
189}
190
191/** ------------------------------------------------------------------------------------------------------------- *
192 * @brief initializeCodeGen
193 ** ------------------------------------------------------------------------------------------------------------- */
194void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
195
196    assert(!mCarryMetadata.empty());
197    mCarryInfo = &mCarryMetadata[0];
198    assert (!mCarryInfo->hasSummary());
199
200    mCurrentFrame = b->getScalarFieldPtr("carries");
201    mCurrentFrameIndex = 0;
202    mCarryScopes = 0;
203    mCarryScopeIndex.push_back(0);
204
205    assert (mCarryFrameStack.empty());
206
207    assert (mCarrySummaryStack.empty());
208
209    Type * const carryTy = b->getBitBlockType();
210
211    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
212
213    if (mHasLoop) {       
214        mLoopSelector = b->getScalarField("selector");
215        mNextLoopSelector = b->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
216    }
217
218}
219
220/** ------------------------------------------------------------------------------------------------------------- *
221 * @brief finalizeCodeGen
222 ** ------------------------------------------------------------------------------------------------------------- */
223void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
224    if (mHasLoop) {
225        b->setScalarField("selector", mNextLoopSelector);
226    }
227    if (mHasLongAdvance) {
228        Value * idx = b->getScalarField("CarryBlockIndex");
229        idx = b->CreateAdd(idx, b->getSize(1));
230        b->setScalarField("CarryBlockIndex", idx);
231    }
232    assert (mCarryFrameStack.empty());   
233    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
234    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
235    mCarrySummaryStack.clear();
236    assert (mCarryScopeIndex.size() == 1);
237    mCarryScopeIndex.clear();
238}
239
240/** ------------------------------------------------------------------------------------------------------------- *
241 * @brief enterLoopScope
242 ** ------------------------------------------------------------------------------------------------------------- */
243void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
244    assert (scope);
245    assert (mHasLoop);
246    ++mLoopDepth;
247    enterScope(b, scope);
248}
249
250/** ------------------------------------------------------------------------------------------------------------- *
251 * @brief enterLoopBody
252 ** ------------------------------------------------------------------------------------------------------------- */
253void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock) {
254    if (mCarryInfo->hasSummary()) {
255        Type * const carryTy = b->getBitBlockType();
256        PHINode * phiCarryOutSummary = b->CreatePHI(carryTy, 2, "summary");
257        assert (!mCarrySummaryStack.empty());
258        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
259        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
260        // properly OR it into the outgoing summary value.
261        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
262        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
263        mCarrySummaryStack.back() = phiCarryOutSummary;
264        mCarrySummaryStack.push_back(phiCarryOutSummary);
265    }
266    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
267
268        assert (mCarryInfo->hasSummary());
269
270        Type * const int8PtrTy = b->getInt8PtrTy();
271        Type * const carryTy = b->getBitBlockType();
272        PointerType * const carryPtrTy = carryTy->getPointerTo();
273
274        // Check whether we need to resize the carry state
275        PHINode * index = b->CreatePHI(b->getSizeTy(), 2);
276        mLoopIndicies.push_back(index);
277        index->addIncoming(b->getSize(0), entryBlock);
278        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
279        Value * capacity = b->CreateLoad(capacityPtr, "capacity");
280        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
281        Value * arrayPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(1)});
282        Value * array = b->CreateLoad(arrayPtr, "array");
283        BasicBlock * const entry = b->GetInsertBlock();
284        BasicBlock * const resizeCarryState = b->CreateBasicBlock("ResizeCarryState");
285        BasicBlock * const reallocExisting = b->CreateBasicBlock("ReallocExisting");
286        BasicBlock * const createNew = b->CreateBasicBlock("CreateNew");
287        BasicBlock * const resumeKernel = b->CreateBasicBlock("ResumeKernel");
288        b->CreateLikelyCondBr(b->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
289
290        // RESIZE CARRY BLOCK
291        b->SetInsertPoint(resizeCarryState);
292        const auto BlockWidth = b->getBitBlockWidth() / 8;
293        const auto Log2BlockWidth = floor_log2(BlockWidth);
294        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), b->getSizeTy(), false);
295        Value * const summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
296        Value * const hasCarryState = b->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
297        b->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
298
299        // REALLOCATE EXISTING
300        b->SetInsertPoint(reallocExisting);
301        Value * const capacitySize = b->CreateMul(capacity, carryStateWidth);
302        Value * const newCapacitySize = b->CreateShl(capacitySize, 1); // x 2
303        Value * const newArray = b->CreateCacheAlignedMalloc(newCapacitySize);
304        b->CreateMemCpy(newArray, array, capacitySize, b->getCacheAlignment());
305        b->CreateFree(array);
306        b->CreateStore(newArray, arrayPtr);
307        Value * const startNewArrayPtr = b->CreateGEP(b->CreatePointerCast(newArray, int8PtrTy), capacitySize);
308        b->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
309        Value * const newCapacity = b->CreateShl(capacity, 1);
310        b->CreateStore(newCapacity, capacityPtr);
311        Value * const summary = b->CreateLoad(summaryPtr, false);
312        Value * const summarySize = b->CreateShl(b->CreateAdd(b->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
313        Constant * const additionalSpace = b->getSize(2 * BlockWidth);
314        Value * const newSummarySize = b->CreateAdd(summarySize, additionalSpace);
315        Value * const newSummary = b->CreateBlockAlignedMalloc(newSummarySize);
316        b->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
317        b->CreateFree(summary);
318        b->CreateStore(b->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
319        Value * const startNewSummaryPtr = b->CreateGEP(b->CreatePointerCast(newSummary, int8PtrTy), summarySize);
320        b->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
321        b->CreateBr(resumeKernel);
322
323        // CREATE NEW
324        b->SetInsertPoint(createNew);
325        Constant * const initialLog2Capacity = b->getInt64(4);
326        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
327        b->CreateStore(initialCapacity, capacityPtr);
328        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
329        Value * initialArray = b->CreateCacheAlignedMalloc(initialCapacitySize);
330        b->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
331        initialArray = b->CreatePointerCast(initialArray, array->getType());
332        b->CreateStore(initialArray, arrayPtr);
333        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, b->getInt64(1)), b->getInt64(Log2BlockWidth + 1));
334        Value * initialSummary = b->CreateBlockAlignedMalloc(initialSummarySize);
335        b->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
336        initialSummary = b->CreatePointerCast(initialSummary, carryPtrTy);
337        b->CreateStore(initialSummary, summaryPtr);
338        b->CreateBr(resumeKernel);
339
340        // RESUME KERNEL
341        b->SetInsertPoint(resumeKernel);
342        PHINode * phiArrayPtr = b->CreatePHI(array->getType(), 3);
343        phiArrayPtr->addIncoming(array, entry);
344        phiArrayPtr->addIncoming(initialArray, createNew);
345        phiArrayPtr->addIncoming(newArray, reallocExisting);
346
347        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
348        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
349        mCurrentFrame = b->CreateGEP(phiArrayPtr, index);
350    }
351}
352
353/** ------------------------------------------------------------------------------------------------------------- *
354 * @brief leaveLoopBody
355 ** ------------------------------------------------------------------------------------------------------------- */
356void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * /* exitBlock */) {
357
358    Type * const carryTy = b->getBitBlockType();
359
360    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
361
362        assert (mCarryInfo->hasSummary());
363
364        ConstantInt * const summaryIndex = b->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
365
366        Value * const carryInAccumulator = readCarryInSummary(b, summaryIndex);
367        Value * const carryOutAccumulator = mCarrySummaryStack.back();
368
369        if (mCarryInfo->hasExplicitSummary()) {
370            writeCarryOutSummary(b, carryOutAccumulator, summaryIndex);
371        }
372
373        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
374        mCarryFrameStack.pop_back();
375
376        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
377        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
378        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
379        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
380        // (for the subsequent block to subtract) using a half-adder circuit.
381
382        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
383        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
384        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
385
386        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
387        Value * capacity = b->CreateLoad(capacityPtr, false);
388        Value * summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
389        Value * summary = b->CreateLoad(summaryPtr, false);
390
391        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
392
393        Value * loopSelector = b->CreateZExt(mLoopSelector, capacity->getType());
394
395        BasicBlock * entry = b->GetInsertBlock();
396        BasicBlock * update = b->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
397        BasicBlock * resume = b->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
398
399        b->CreateBr(update);
400
401        b->SetInsertPoint(update);
402        PHINode * i = b->CreatePHI(capacity->getType(), 2);
403        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
404        PHINode * const borrow = b->CreatePHI(carryInAccumulator->getType(), 2);
405        borrow->addIncoming(carryInAccumulator, entry);
406        PHINode * const carry = b->CreatePHI(carryOutAccumulator->getType(), 2);
407        carry->addIncoming(carryOutAccumulator, entry);
408        // OR the updated carry in summary later for the summaryTest
409        PHINode * const carryInSummary = b->CreatePHI(carryTy, 2);
410        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
411
412        // half subtractor
413        Value * const carryInOffset = b->CreateOr(b->CreateShl(i, 1), loopSelector);
414        Value * const carryInPtr = b->CreateGEP(summary, carryInOffset);
415        Value * const carryIn = b->CreateBlockAlignedLoad(carryInPtr);
416        Value * const nextCarryIn = b->CreateXor(carryIn, borrow);
417        Value * const nextSummary = b->CreateOr(carryInSummary, nextCarryIn);
418
419        b->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
420        carryInSummary->addIncoming(nextSummary, update);
421        Value * finalBorrow = b->CreateAnd(b->CreateNot(carryIn), borrow);
422        borrow->addIncoming(finalBorrow, update);
423
424        // half adder
425        Value * const carryOutOffset = b->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
426        Value * const carryOutPtr = b->CreateGEP(summary, carryOutOffset);
427        Value * const carryOut = b->CreateBlockAlignedLoad(carryOutPtr);
428        Value * const nextCarryOut = b->CreateXor(carryOut, carry);
429
430        b->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
431        Value * finalCarry = b->CreateAnd(carryOut, carry);
432        carry->addIncoming(finalCarry, update);
433
434        // loop condition
435        i->addIncoming(b->CreateAdd(i, ONE), update);
436        b->CreateCondBr(b->CreateICmpNE(b->CreateShl(ONE, i), capacity), update, resume);
437
438        b->SetInsertPoint(resume);
439
440        b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry),
441                                   "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
442
443        assert (!mLoopIndicies.empty());
444        PHINode * index = mLoopIndicies.back();
445        index->addIncoming(b->CreateAdd(index, b->getSize(1)), resume);
446        mLoopIndicies.pop_back();
447
448        mNextSummaryTest = nextSummary;
449    }
450    if (mCarryInfo->hasSummary()) {
451        const auto n = mCarrySummaryStack.size(); assert (n > 1);
452        Value * carryOut = mCarrySummaryStack.back();
453        mCarrySummaryStack.pop_back();
454        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
455        phiCarryOut->addIncoming(carryOut, b->GetInsertBlock());
456        // If we're returning to the base scope, reset our accumulated summary value.
457        if (n == 2) {
458            carryOut = Constant::getNullValue(carryTy);
459        }
460        mCarrySummaryStack.back() = carryOut;
461    }
462}
463
464/** ------------------------------------------------------------------------------------------------------------- *
465 * @brief leaveLoopScope
466 ** ------------------------------------------------------------------------------------------------------------- */
467void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
468    assert (mLoopDepth > 0);
469    --mLoopDepth;
470    leaveScope(b);
471}
472
473/** ------------------------------------------------------------------------------------------------------------- *
474 * @brief enterIfScope
475 ** ------------------------------------------------------------------------------------------------------------- */
476void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
477    ++mIfDepth;
478    enterScope(b, scope);
479    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
480    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
481    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
482    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
483        assert (mCurrentFrameIndex == 0);
484        mNextSummaryTest = readCarryInSummary(b, b->getInt32(0));
485        if (mCarryInfo->hasExplicitSummary()) {
486            mCurrentFrameIndex = 1;
487        }
488    }
489    Type * const carryTy = b->getBitBlockType();
490    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
491}
492
493/** ------------------------------------------------------------------------------------------------------------- *
494 * @brief generateSummaryTest
495 ** ------------------------------------------------------------------------------------------------------------- */
496Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & b, Value * condition) {
497    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
498        assert ("summary test was not generated" && mNextSummaryTest);
499        condition = b->simd_or(condition, mNextSummaryTest);
500        mNextSummaryTest = nullptr;
501    }
502    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
503    return condition;
504}
505
506/** ------------------------------------------------------------------------------------------------------------- *
507 * @brief enterIfBody
508 ** ------------------------------------------------------------------------------------------------------------- */
509void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* b */, BasicBlock * const entryBlock) {
510    assert (entryBlock);
511}
512
513/** ------------------------------------------------------------------------------------------------------------- *
514 * @brief leaveIfBody
515 ** ------------------------------------------------------------------------------------------------------------- */
516void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const exitBlock) {
517    assert (exitBlock);
518    const auto n = mCarrySummaryStack.size();
519    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
520        writeCarryOutSummary(b, mCarrySummaryStack[n - 1], b->getInt32(0));
521    }
522    if (n > 2) {
523        mCarrySummaryStack[n - 1] = b->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
524    }
525}
526
527/** ------------------------------------------------------------------------------------------------------------- *
528 * @brief leaveIfScope
529 ** ------------------------------------------------------------------------------------------------------------- */
530void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
531    assert (mIfDepth > 0);
532    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
533        const auto n = mCarrySummaryStack.size(); assert (n > 0);
534        if (n > 2) {
535            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
536            // appropriate summary is stored in the outer scope.
537            Value * nested = mCarrySummaryStack[n - 1];
538            Value * outer = mCarrySummaryStack[n - 2];
539            assert (nested->getType() == outer->getType());
540            PHINode * const phi = b->CreatePHI(nested->getType(), 2, "summary");
541            phi->addIncoming(outer, entryBlock);
542            phi->addIncoming(nested, exitBlock);
543            mCarrySummaryStack[n - 2] = phi;
544        }
545    }
546    --mIfDepth;
547    leaveScope(b);
548    mCarrySummaryStack.pop_back();
549}
550
551/** ------------------------------------------------------------------------------------------------------------ *
552 * @brief enterScope
553 ** ------------------------------------------------------------------------------------------------------------- */
554void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
555    assert (scope);
556    // Store the state of the current frame and update the scope state
557    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
558    mCurrentScope = scope;
559    mCarryScopeIndex.push_back(++mCarryScopes);
560    mCarryInfo = &mCarryMetadata[mCarryScopes];
561    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
562    // compilation or a memory corruption has occured.
563    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
564    mCurrentFrame = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
565    // Verify we're pointing to a carry frame struct
566    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
567    mCurrentFrameIndex = 0;
568}
569
570/** ------------------------------------------------------------------------------------------------------------- *
571 * @brief leaveScope
572 ** ------------------------------------------------------------------------------------------------------------- */
573void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* b */) {
574
575    // Did we use all of the packs in this carry struct?
576    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
577    // Sanity test: are there remaining carry frames?
578    assert (!mCarryFrameStack.empty());
579
580    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
581
582    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
583
584    mCarryFrameStack.pop_back();
585    mCarryScopeIndex.pop_back();
586    assert (!mCarryScopeIndex.empty());
587    mCurrentScope = mCurrentScope->getPredecessor();
588    assert (mCurrentScope);
589    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
590}
591
592/** ------------------------------------------------------------------------------------------------------------- *
593 * @brief addCarryInCarryOut
594 ** ------------------------------------------------------------------------------------------------------------- */
595Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Statement * const operation, Value * const e1, Value * const e2) {
596    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
597    Value * const carryIn = getNextCarryIn(b);
598    Value * carryOut, * result;
599    std::tie(carryOut, result) = b->bitblock_add_with_carry(e1, e2, carryIn);
600    setNextCarryOut(b, carryOut);
601    assert (result->getType() == b->getBitBlockType());
602    return result;
603}
604
605/** ------------------------------------------------------------------------------------------------------------- *
606 * @brief advanceCarryInCarryOut
607 ** ------------------------------------------------------------------------------------------------------------- */
608Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Advance * const advance, Value * const value) {
609    const auto shiftAmount = advance->getAmount();
610    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
611        Value * const carryIn = getNextCarryIn(b);
612        Value * carryOut, * result;
613        std::tie(carryOut, result) = b->bitblock_advance(value, carryIn, shiftAmount);
614        setNextCarryOut(b, carryOut);
615        assert (result->getType() == b->getBitBlockType());
616        return result;
617    } else {
618        return longAdvanceCarryInCarryOut(b, value, shiftAmount);
619    }
620}
621
622/** ------------------------------------------------------------------------------------------------------------- *
623 * @brief indexedAdvanceCarryInCarryOut
624 ** ------------------------------------------------------------------------------------------------------------- */
625Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
626    const auto shiftAmount = advance->getAmount();
627    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
628        Value * const carryIn = getNextCarryIn(b);
629        Value * carryOut, * result;
630        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
631        setNextCarryOut(b, carryOut);
632        return result;
633    } else if (shiftAmount <= b->getBitBlockWidth()) {
634        Value * carryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
635        Value * carryIn = b->CreateBlockAlignedLoad(carryPtr);
636        Value * carryOut, * result;
637        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
638        b->CreateBlockAlignedStore(carryOut, carryPtr);
639        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
640            addToCarryOutSummary(b, strm);
641        }
642        return result;
643    } else {
644        unsigned summaryFrame = mCurrentFrameIndex;
645        if (mIfDepth > 0) {
646            // Skip over summary frame to perform the long indexed advance.
647            mCurrentFrameIndex++;
648        }
649        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
650        Constant * blockWidth = b->getSize(b->getBitBlockWidth());
651        Constant * blockWidth_1 = b->getSize(b->getBitBlockWidth() - 1);
652        Value * carryPosition = b->getScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex));
653        Value * carryBlockEndPos = b->CreateAdd(carryPosition, blockWidth_1);
654        unsigned carry_blocks = nearest_pow2(1+ceil_udiv(shiftAmount, b->getBitBlockWidth()));
655        Constant * carryQueueBlocks = b->getSize(carry_blocks);
656        Value * carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
657        Value * carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryBlockEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
658        Value * lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
659        Value * hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
660        Value * c_lo = b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock);
661        Value * c_hi = b->CreateBitCast(b->CreateBlockAlignedLoad(hi_GEP), iBitBlock);
662        Value * lo_shift = b->CreateZExt(b->CreateURem(carryPosition, blockWidth), iBitBlock);
663        Value * hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryBlockEndPos, blockWidth)), iBitBlock);
664        Value * carryIn = b->CreateOr(b->CreateLShr(c_lo, lo_shift), b->CreateShl(c_hi, hi_shift));
665        Value * carryOut, * result;
666        std::tie(carryOut, result) = b->bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount);
667        carryOut = b->CreateBitCast(carryOut, iBitBlock);
668        Value * adv = b->mvmd_extract(sizeof(size_t) * 8, b->simd_popcount(b->getBitBlockWidth(), index_strm), 0);
669        b->setScalarField("IndexedAdvancePosition" + std::to_string(mIndexedLongAdvanceIndex), b->CreateAdd(carryPosition, adv));
670        Value * carryOutPosition = b->CreateAdd(carryPosition, b->getSize(shiftAmount));
671        Value * carryOutEndPos = b->CreateAdd(carryOutPosition, blockWidth_1);
672        carryBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutPosition, blockWidth), carryQueueBlocks), b->getInt32Ty());
673        carryEndBlock = b->CreateTrunc(b->CreateURem(b->CreateUDiv(carryOutEndPos, blockWidth), carryQueueBlocks), b->getInt32Ty());
674        lo_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryBlock});
675        hi_GEP = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), carryEndBlock});
676        lo_shift = b->CreateZExt(b->CreateURem(carryOutPosition, blockWidth), iBitBlock);
677        hi_shift = b->CreateZExt(b->CreateSub(blockWidth_1, b->CreateURem(carryOutEndPos, blockWidth)), iBitBlock);
678        c_lo = b->CreateOr(b->CreateBitCast(b->CreateBlockAlignedLoad(lo_GEP), iBitBlock), b->CreateShl(carryOut, lo_shift));
679        c_hi = b->CreateLShr(carryOut, hi_shift);
680        b->CreateBlockAlignedStore(b->CreateBitCast(c_lo, b->getBitBlockType()), lo_GEP);
681        b->CreateBlockAlignedStore(b->CreateBitCast(c_hi, b->getBitBlockType()), hi_GEP);
682        mIndexedLongAdvanceIndex++;
683        mCurrentFrameIndex++;
684        // Now handle the summary.
685        if (mIfDepth > 0) {
686            const auto summaryBlocks = ceil_udiv(shiftAmount, b->getBitBlockWidth());
687            const auto summarySize = ceil_udiv(summaryBlocks, b->getBitBlockWidth());
688            for (unsigned i = 0; i < summarySize; i++) {
689                // All ones summary for now.
690                b->CreateBlockAlignedStore(b->allOnes(), b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(summaryFrame), b->getInt32(i)}));
691            }
692        }
693        return result;
694    }
695}
696
697
698/** ------------------------------------------------------------------------------------------------------------- *
699 * @brief longAdvanceCarryInCarryOut
700 ** ------------------------------------------------------------------------------------------------------------- */
701inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value, const unsigned shiftAmount) {
702
703    assert (mHasLongAdvance);
704    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
705    assert (value);
706
707    const auto blockWidth = b->getBitBlockWidth();
708    Type * const streamTy = b->getIntNTy(blockWidth);
709
710    Value * indices[3];
711
712    indices[0] = b->getInt32(0);
713
714    if (mIfDepth > 0) {
715        if (shiftAmount > blockWidth) {
716
717            // TODO: once CEILING(shiftAmount / 256) > 2, consider using a half-adder/subtractor strategy?
718
719            Value * carry = b->CreateZExt(b->bitblock_any(value), streamTy);
720            const auto summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
721            const auto summarySize = ceil_udiv(summaryBlocks, blockWidth);
722            VectorType * const bitBlockTy = b->getBitBlockType();
723            IntegerType * const laneTy = cast<IntegerType>(bitBlockTy->getVectorElementType());
724            const auto laneWidth = laneTy->getIntegerBitWidth();
725
726            assert (summarySize > 0);
727            assert (is_power_2(laneWidth));
728
729            indices[1] = b->getInt32(mCurrentFrameIndex++);
730
731            for (unsigned i = 1;;++i) {
732
733                assert (i <= summarySize);
734
735                indices[2] = b->getInt32(i - 1);
736
737                Value * const ptr = b->CreateGEP(mCurrentFrame, indices);
738                Value * const prior = b->CreateBitCast(b->CreateBlockAlignedLoad(ptr), streamTy);
739
740                Value * advanced = nullptr;
741                if (LLVM_LIKELY(summaryBlocks < laneWidth)) {
742                    advanced = b->CreateOr(b->CreateShl(prior, 1), carry);
743                    carry = b->CreateLShr(prior, summaryBlocks - 1);
744                } else {
745                    std::tie(advanced, carry) = b->bitblock_advance(prior, carry, 1);
746                }
747                Value * stream = b->CreateBitCast(advanced, bitBlockTy);
748                if (LLVM_LIKELY(i == summarySize)) {
749                    const auto n = bitBlockTy->getVectorNumElements();
750                    Constant * mask[n];                                       
751                    const auto m = udiv(summaryBlocks, laneWidth);
752                    if (m) {
753                        std::fill_n(mask, m, ConstantInt::getAllOnesValue(laneTy));
754                    }
755                    mask[m] = ConstantInt::get(laneTy, (1UL << (summaryBlocks & (laneWidth - 1))) - 1UL);
756                    if (n > m) {
757                        std::fill_n(mask + m + 1, n - m, UndefValue::get(laneTy));
758                    }
759                    stream = b->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
760                    addToCarryOutSummary(b, stream);
761                    b->CreateBlockAlignedStore(stream, ptr);
762                    break;
763                }
764                addToCarryOutSummary(b, stream);
765                b->CreateBlockAlignedStore(stream, ptr);
766            }
767
768        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
769            addToCarryOutSummary(b, value);
770        }
771    }
772
773    indices[1] = b->getInt32(mCurrentFrameIndex++);
774
775    // special case using a single buffer entry and the carry_out value.
776    if (LLVM_LIKELY((shiftAmount < blockWidth) && (mLoopDepth == 0))) {
777
778        indices[2] = indices[0]; // b->getInt32(0)
779        assert (cast<ConstantInt>(indices[2])->isNullValue());
780
781        Value * const buffer = b->CreateGEP(mCurrentFrame, indices);
782        assert (buffer->getType()->getPointerElementType() == b->getBitBlockType());
783        Value * carryIn = b->CreateBlockAlignedLoad(buffer);
784
785        b->CreateBlockAlignedStore(value, buffer);
786        /* Very special case - no combine */
787        if (LLVM_UNLIKELY(shiftAmount == blockWidth)) {
788            return b->CreateBitCast(carryIn, b->getBitBlockType());
789        }
790        Value* block0_shr = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
791        Value* block1_shl = b->CreateShl(b->CreateBitCast(value, streamTy), shiftAmount);
792        return b->CreateBitCast(b->CreateOr(block1_shl, block0_shr), b->getBitBlockType());
793    } else { //
794        const unsigned blockShift = shiftAmount & (blockWidth - 1);
795        const unsigned summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
796
797        // Create a mask to implement circular buffer indexing
798        Value * indexMask = b->getSize(nearest_pow2(summaryBlocks) - 1);
799        Value * blockIndex = b->getScalarField("CarryBlockIndex");
800
801        Value * carryIndex0 = b->CreateSub(blockIndex, b->getSize(summaryBlocks));
802        indices[2] = b->CreateAnd(carryIndex0, indexMask);
803        Value * const carryInPtr = b->CreateGEP(mCurrentFrame, indices);
804        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
805
806        indices[2] = b->CreateAnd(blockIndex, indexMask);
807        Value * const carryOutPtr = b->CreateGEP(mCurrentFrame, indices);
808        assert (carryIn->getType() == b->getBitBlockType());
809
810        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
811        // block in the long advance carry data area.
812        if (LLVM_UNLIKELY(blockShift == 0)) {
813            b->CreateBlockAlignedStore(value, carryOutPtr);
814            return carryIn;
815        } else { // Otherwise we need to combine data from the two oldest blocks.
816            Value * const carryIndex1 = b->CreateSub(blockIndex, b->getSize(summaryBlocks - 1));
817            indices[2] = b->CreateAnd(carryIndex1, indexMask);
818
819            Value * const carryInPtr2 = b->CreateGEP(mCurrentFrame, indices);
820            Value * const carryIn2 = b->CreateBlockAlignedLoad(carryInPtr2);
821            assert (carryOutPtr->getType()->getPointerElementType() == value->getType());
822            b->CreateBlockAlignedStore(value, carryOutPtr);
823
824            Value * const b0 = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
825            Value * const b1 = b->CreateShl(b->CreateBitCast(carryIn2, streamTy), blockShift);
826            return b->CreateBitCast(b->CreateOr(b1, b0), b->getBitBlockType());
827        }
828    }
829}
830
831/** ------------------------------------------------------------------------------------------------------------- *
832 * @brief getNextCarryIn
833 ** ------------------------------------------------------------------------------------------------------------- */
834Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & b) {
835    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
836    if (mLoopDepth == 0) {
837        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
838    } else {
839        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mLoopSelector});
840    }
841    Type * const carryTy = b->getBitBlockType();
842    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
843    Value * const carryIn = b->CreateBlockAlignedLoad(mCarryPackPtr);
844    if (mLoopDepth > 0) {
845        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
846    }
847    return carryIn;
848}
849
850/** ------------------------------------------------------------------------------------------------------------- *
851 * @brief setNextCarryOut
852 ** ------------------------------------------------------------------------------------------------------------- */
853void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * carryOut) {
854    Type * const carryTy = b->getBitBlockType();
855    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
856    carryOut = b->CreateBitCast(carryOut, carryTy);
857    if (mCarryInfo->hasSummary()) {
858        addToCarryOutSummary(b, carryOut);
859    }
860    if (mLoopDepth != 0) {
861        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mNextLoopSelector});
862        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
863            Value * accum = b->CreateBlockAlignedLoad(mCarryPackPtr);
864            carryOut = b->CreateOr(carryOut, accum);
865        }
866    }
867    ++mCurrentFrameIndex;
868    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
869    b->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
870}
871
872/** ------------------------------------------------------------------------------------------------------------- *
873 * @brief readCarryInSummary
874 ** ------------------------------------------------------------------------------------------------------------- */
875Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & b, ConstantInt * index) const {
876    assert (mCarryInfo->hasSummary());
877    unsigned count = 2;
878    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
879        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
880        count = 1;
881        while (frameTy->isStructTy()) {
882            ++count;
883            assert (frameTy->getStructNumElements() > 0);
884            frameTy = frameTy->getStructElementType(0);
885        }
886    }
887    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
888    Value * indicies[length];
889    std::fill(indicies, indicies + count - 1, b->getInt32(0));
890    indicies[count - 1] = index;
891    if (mLoopDepth != 0) {
892        indicies[count] = mLoopSelector;
893    }
894
895    ArrayRef<Value *> ar(indicies, length);
896    Value * const ptr = b->CreateGEP(mCurrentFrame, ar);
897    Value * const summary = b->CreateBlockAlignedLoad(ptr);
898    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
899        Type * const carryTy = b->getBitBlockType();
900        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
901    }
902    return summary;
903}
904
905/** ------------------------------------------------------------------------------------------------------------- *
906 * @brief writeCarryOutSummary
907 ** ------------------------------------------------------------------------------------------------------------- */
908inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const summary, ConstantInt * index) const {
909    Value * ptr = nullptr;
910    assert (mCarryInfo->hasExplicitSummary());
911    if (mLoopDepth > 0) {
912        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index, mNextLoopSelector});
913    } else {
914        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index});
915    }
916    b->CreateBlockAlignedStore(summary, ptr);
917}
918
919/** ------------------------------------------------------------------------------------------------------------- *
920 * @brief addToCarryOutSummary
921 ** ------------------------------------------------------------------------------------------------------------- */
922inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value) {
923    assert ("cannot add null summary value!" && value);   
924    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
925    assert (mCarryInfo->hasSummary());
926    mCarrySummaryStack.back() = b->CreateOr(value, mCarrySummaryStack.back());
927}
928
929/** ------------------------------------------------------------------------------------------------------------- *
930 * @brief enumerate
931 ** ------------------------------------------------------------------------------------------------------------- */
932unsigned CarryManager::getScopeCount(const PabloBlock * const scope, unsigned index) {
933    for (const Statement * stmt : *scope) {
934        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
935            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
936        }
937    }
938    return index + 1;
939}
940
941/** ------------------------------------------------------------------------------------------------------------- *
942 * @brief hasIterationSpecificAssignment
943 ** ------------------------------------------------------------------------------------------------------------- */
944bool isNonRegularLanguage(const PabloBlock * const scope) {
945    if (const Branch * br = scope->getBranch()) {
946        return !br->isRegular();
947    }
948    return false;
949}
950
951static bool hasNonEmptyCarryStruct(const Type * const frameTy) {
952    if (frameTy->isStructTy()) {
953        for (unsigned i = 0; i < frameTy->getStructNumElements(); ++i) {
954            if (hasNonEmptyCarryStruct(frameTy->getStructElementType(i))) {
955                return true;
956            }
957        }
958        return false;
959    }
960    return true;
961}
962
963static bool hasNonEmptyCarryStruct(const std::vector<Type *> & state) {
964    for (const Type * const frameTy : state) {
965        if (hasNonEmptyCarryStruct(frameTy)) {
966            return true;
967        }
968    }
969    return false;
970}
971
972/** ------------------------------------------------------------------------------------------------------------- *
973 * @brief analyse
974 ** ------------------------------------------------------------------------------------------------------------- */
975StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope,
976                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
977    assert ("scope cannot be null!" && scope);
978    assert ("entry scope (and only the entry scope) must be in scope 0"
979            && (mCarryScopes == 0 ? (scope == mKernel->getEntryScope()) : (scope != mKernel->getEntryScope())));
980    assert (mCarryScopes < mCarryMetadata.size());
981    Type * const carryTy = b->getBitBlockType();
982    Type * const blockTy = b->getBitBlockType();
983
984    const unsigned carryScopeIndex = mCarryScopes++;
985    const bool nonCarryCollapsingMode = isNonRegularLanguage(scope);
986    Type * const carryPackType = (loopDepth == 0) ? carryTy : ArrayType::get(carryTy, 2);
987    std::vector<Type *> state;
988    for (const Statement * stmt : *scope) {
989        if (LLVM_UNLIKELY(isa<Advance>(stmt) || isa<IndexedAdvance>(stmt))) {
990            int64_t amount;
991            if (isa<Advance>(stmt)) amount = cast<Advance>(stmt)->getAmount();
992            else amount = cast<IndexedAdvance>(stmt)->getAmount();
993            Type * type = carryPackType;
994            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
995                const auto blockWidth = b->getBitBlockWidth();
996                const auto blocks = ceil_udiv(amount, blockWidth);
997                type = ArrayType::get(blockTy, nearest_pow2(blocks + (isa<IndexedAdvance>(stmt) ? 1:0) + ((loopDepth != 0) ? 1 : 0)));
998                if (LLVM_UNLIKELY(ifDepth > 0 && blocks != 1)) {
999                    const auto summarySize = ceil_udiv(blocks, blockWidth);
1000                    // 1 bit will mark the presense of any bit in each block.
1001                    state.push_back(ArrayType::get(blockTy, summarySize));
1002                }
1003                mHasLongAdvance = true;
1004                if (isa<IndexedAdvance>(stmt)) mIndexedLongAdvanceTotal++;
1005            }
1006            state.push_back(type);
1007        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
1008            state.push_back(carryPackType);
1009        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
1010            state.push_back(analyse(b, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
1011        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
1012            mHasLoop = true;
1013            state.push_back(analyse(b, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
1014        }
1015    }
1016    // Build the carry state struct and add the summary pack if needed.
1017    CarryData & cd = mCarryMetadata[carryScopeIndex];
1018    StructType * carryState = nullptr;
1019    CarryData::SummaryType summaryType = CarryData::NoSummary;
1020    if (LLVM_UNLIKELY(state.empty())) {
1021        carryState = StructType::get(b->getContext());
1022    } else {
1023        // do we have a summary or a sequence of nested empty structs?
1024        if (hasNonEmptyCarryStruct(state)) {
1025            if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
1026                if (LLVM_LIKELY(state.size() > 1)) {
1027                    summaryType = CarryData::ExplicitSummary;
1028                    // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
1029                    // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
1030                    state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
1031                } else {
1032                    summaryType = CarryData::ImplicitSummary;
1033                    if (hasNonEmptyCarryStruct(state[0])) {
1034                        summaryType = CarryData::BorrowedSummary;
1035                    }
1036                }
1037            }
1038        }
1039        carryState = StructType::get(b->getContext(), state);
1040        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
1041        // carry state pointer, and summary pointer struct.
1042        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
1043            mHasNonCarryCollapsingLoops = true;
1044            carryState = StructType::get(b->getContext(), {b->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo()});
1045            assert (isDynamicallyAllocatedType(carryState));
1046        }
1047        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
1048    }
1049    cd.setSummaryType(summaryType);
1050    return carryState;
1051}
1052
1053/** ------------------------------------------------------------------------------------------------------------- *
1054 * @brief constructor
1055 ** ------------------------------------------------------------------------------------------------------------- */
1056CarryManager::CarryManager() noexcept
1057: mKernel(nullptr)
1058, mCurrentFrame(nullptr)
1059, mCurrentFrameIndex(0)
1060, mCurrentScope(nullptr)
1061, mCarryInfo(nullptr)
1062, mNextSummaryTest(nullptr)
1063, mIfDepth(0)
1064, mHasLongAdvance(false)
1065, mIndexedLongAdvanceTotal(0)
1066, mIndexedLongAdvanceIndex(0)
1067, mHasNonCarryCollapsingLoops(false)
1068, mHasLoop(false)
1069, mLoopDepth(0)
1070, mLoopSelector(nullptr)
1071, mNextLoopSelector(nullptr)
1072, mCarryPackPtr(nullptr)
1073, mCarryScopes(0) {
1074
1075}
1076
1077}
Note: See TracBrowser for help on using the repository browser.