source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5431

Last change on this file since 5431 was 5431, checked in by nmedfort, 2 years ago

partial refactoring check in with change for Linda.

File size: 43.7 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <IR_Gen/idisa_builder.h>
12#include <llvm/IR/DerivedTypes.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18
19#include <llvm/Support/raw_ostream.h>
20
21using namespace llvm;
22
23namespace pablo {
24
25inline static unsigned ceil_log2(const unsigned v) {
26    assert ("log2(0) is undefined!" && v != 0);
27    return 32 - __builtin_clz(v - 1);
28}
29
30inline static unsigned floor_log2(const unsigned v) {
31    assert ("log2(0) is undefined!" && v != 0);
32    return 31 - __builtin_clz(v);
33}
34
35inline static unsigned nearest_pow2(const unsigned v) {
36    assert(v > 0 && v < (UINT32_MAX / 2));
37    return (v < 2) ? 1 : (1 << ceil_log2(v));
38}
39
40inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
41    return (((x - 1) | (y - 1)) + 1) / y;
42}
43
44using TypeId = PabloAST::ClassTypeId;
45
46inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
47    switch (stmt->getClassTypeId()) {
48        case TypeId::ScanThru:
49        case TypeId::AdvanceThenScanThru:
50        case TypeId::ScanTo:
51        case TypeId::AdvanceThenScanTo:
52        case TypeId::MatchStar:
53            return true;
54        default:
55            return false;
56    }
57}
58
59#define LONG_ADVANCE_BREAKPOINT 64
60
61/** ------------------------------------------------------------------------------------------------------------- *
62 * @brief initializeCarryData
63 ** ------------------------------------------------------------------------------------------------------------- */
64void CarryManager::initializeCarryData() {
65
66    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
67    // that is added to the Kernel. The scope index will indicate which struct to access.
68
69    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
70    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
71    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
72    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
73    // the fact every pablo While loop must be executed at least once.
74
75    // A nested loop may also contain a variable length CarryData struct
76
77    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
78    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
79    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
80    // set of carries for that iteration.
81
82    iBuilder = mKernel->getBuilder();
83    assert ("CarryManager does not have a IDISA builder" && iBuilder);
84
85    PabloBlock * const entryBlock = mKernel->getEntryBlock();
86
87    mCurrentScope = entryBlock;
88
89    mCarryScopes = 0;
90
91    mCarryMetadata.resize(getScopeCount(entryBlock));
92
93    Type * const carryStateTy = analyse(entryBlock);
94
95    mKernel->addScalar(carryStateTy, "carries");
96
97    if (mHasLoop) {
98        mKernel->addScalar(iBuilder->getInt32Ty(), "selector");
99    }
100    if (mHasLongAdvance) {
101        mKernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
102    }
103}
104
105/** ------------------------------------------------------------------------------------------------------------- *
106 * @brief initializeCodeGen
107 ** ------------------------------------------------------------------------------------------------------------- */
108void CarryManager::initializeCodeGen() {
109
110    assert(!mCarryMetadata.empty());
111    mCarryInfo = &mCarryMetadata[0];
112    assert (!mCarryInfo->hasSummary());
113
114    iBuilder = mKernel->getBuilder();
115    assert ("CarryManager does not have a IDISA builder" && iBuilder);
116
117    mCurrentFrame = mKernel->getScalarFieldPtr("carries");
118    mCurrentFrameIndex = 0;
119    mCarryScopes = 0;
120    mCarryScopeIndex.push_back(0);
121
122    assert (mCarryFrameStack.empty());
123
124    assert (mCarrySummaryStack.empty());
125    mCarrySummaryStack.push_back(Constant::getNullValue(getCarryPackType()));
126
127    if (mHasLoop) {       
128        mLoopSelector = mKernel->getScalarField("selector");
129        mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
130    }
131
132}
133
134/** ------------------------------------------------------------------------------------------------------------- *
135 * @brief finalizeCodeGen
136 ** ------------------------------------------------------------------------------------------------------------- */
137void CarryManager::finalizeCodeGen() {
138    if (mHasLoop) {
139        mKernel->setScalarField("selector", mNextLoopSelector);
140    }
141    if (mHasLongAdvance) {
142        Value * idx = mKernel->getScalarField("CarryBlockIndex");
143        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
144        mKernel->setScalarField("CarryBlockIndex", idx);
145    }
146    assert (mCarryFrameStack.empty());   
147    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
148    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
149    mCarrySummaryStack.clear();
150    assert (mCarryScopeIndex.size() == 1);
151    mCarryScopeIndex.clear();
152}
153
154/** ------------------------------------------------------------------------------------------------------------- *
155 * @brief enterLoopScope
156 ** ------------------------------------------------------------------------------------------------------------- */
157void CarryManager::enterLoopScope(const PabloBlock * const scope) {
158    assert (scope);
159    assert (mHasLoop);
160    ++mLoopDepth;
161    enterScope(scope);
162}
163
164/** ------------------------------------------------------------------------------------------------------------- *
165 * @brief enterLoopBody
166 ** ------------------------------------------------------------------------------------------------------------- */
167void CarryManager::enterLoopBody(BasicBlock * const entryBlock) {
168    if (mCarryInfo->hasSummary()) {
169        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(getCarryPackType(), 2, "summary");
170        assert (!mCarrySummaryStack.empty());
171        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
172        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
173        // properly OR it into the outgoing summary value.
174        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
175        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
176        mCarrySummaryStack.back() = phiCarryOutSummary;
177        mCarrySummaryStack.push_back(phiCarryOutSummary);
178    }
179    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
180
181        assert (mCarryInfo->hasSummary());
182
183        // Check whether we need to resize the carry state
184        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
185        mLoopIndicies.push_back(index);
186        index->addIncoming(iBuilder->getSize(0), entryBlock);
187
188        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
189        Value * capacity = iBuilder->CreateLoad(capacityPtr, false, "capacity");
190        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
191        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
192        Value * array = iBuilder->CreateLoad(arrayPtr, false, "array");
193
194        BasicBlock * const entry = iBuilder->GetInsertBlock();
195        BasicBlock * const resizeCarryState = mKernel->CreateBasicBlock("ResizeCarryState");
196        BasicBlock * const reallocExisting = mKernel->CreateBasicBlock("ReallocExisting");
197        BasicBlock * const createNew = mKernel->CreateBasicBlock("CreateNew");
198        BasicBlock * const resumeKernel = mKernel->CreateBasicBlock("ResumeKernel");
199
200        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(index, capacity), resumeKernel, resizeCarryState);
201
202        // RESIZE CARRY BLOCK
203        iBuilder->SetInsertPoint(resizeCarryState);
204        const auto BlockWidth = getCarryPackType()->getPrimitiveSizeInBits() / 8;
205        const auto Log2BlockWidth = floor_log2(BlockWidth);
206        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
207        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
208
209        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
210
211        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
212
213        // REALLOCATE EXISTING
214        iBuilder->SetInsertPoint(reallocExisting);
215
216        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
217        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
218
219
220        Value * newArray = iBuilder->CreateAlignedMalloc(newCapacitySize, iBuilder->getCacheAlignment());
221        iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
222        iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
223        iBuilder->CreateAlignedFree(array);
224        newArray = iBuilder->CreatePointerCast(newArray, array->getType());
225        iBuilder->CreateStore(newArray, arrayPtr);
226
227        Value * const log2capacity = iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE);
228        Value * const summarySize = iBuilder->CreateShl(log2capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
229        Value * const newLog2Capacity = iBuilder->CreateAdd(log2capacity, ONE);
230        Value * const newSummarySize = iBuilder->CreateShl(newLog2Capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
231
232        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
233        Value * newSummary = iBuilder->CreateAlignedMalloc(newSummarySize, BlockWidth);
234        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
235        iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
236        iBuilder->CreateAlignedFree(summary);
237
238        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
239        ptr1 = iBuilder->CreatePointerCast(ptr1, getCarryPackType()->getPointerTo());
240
241        Value * ptr2 = iBuilder->CreateGEP(newSummary, iBuilder->CreateAdd(summarySize, iBuilder->getSize(BlockWidth)));
242        ptr2 = iBuilder->CreatePointerCast(ptr2, getCarryPackType()->getPointerTo());
243
244        newSummary = iBuilder->CreatePointerCast(newSummary, getCarryPackType()->getPointerTo());
245        iBuilder->CreateStore(newSummary, summaryPtr);
246        Value * const newCapacity = iBuilder->CreateShl(ONE, log2capacity);
247
248        iBuilder->CreateStore(newCapacity, capacityPtr);
249
250        iBuilder->CreateBr(resumeKernel);
251
252        // CREATE NEW
253        iBuilder->SetInsertPoint(createNew);
254
255        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
256        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
257        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
258
259        Value * initialArray = iBuilder->CreateAlignedMalloc(initialCapacitySize, iBuilder->getCacheAlignment());
260        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
261        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
262        iBuilder->CreateStore(initialArray, arrayPtr);
263
264        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
265        Value * initialSummary = iBuilder->CreateAlignedMalloc(initialSummarySize, BlockWidth);
266        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
267        initialSummary = iBuilder->CreatePointerCast(initialSummary, getCarryPackType()->getPointerTo());
268        iBuilder->CreateStore(initialSummary, summaryPtr);
269
270        iBuilder->CreateStore(initialCapacity, capacityPtr);
271
272        iBuilder->CreateBr(resumeKernel);
273
274        // RESUME KERNEL
275        iBuilder->SetInsertPoint(resumeKernel);
276        // Load the appropriate carry stat block
277        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
278        phiArrayPtr->addIncoming(array, entry);
279        phiArrayPtr->addIncoming(initialArray, createNew);
280        phiArrayPtr->addIncoming(newArray, reallocExisting);
281
282        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
283        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
284        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
285    }
286}
287
288/** ------------------------------------------------------------------------------------------------------------- *
289 * @brief leaveLoopBody
290 ** ------------------------------------------------------------------------------------------------------------- */
291void CarryManager::leaveLoopBody(BasicBlock * /* exitBlock */) {
292
293    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
294
295        assert (mCarryInfo->hasSummary());
296
297        ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
298
299        Value * const carryInAccumulator = readCarryInSummary(summaryIndex);
300        Value * const carryOutAccumulator = mCarrySummaryStack.back();
301
302        if (mCarryInfo->hasExplicitSummary()) {
303            writeCarryOutSummary(carryOutAccumulator, summaryIndex);
304        }
305
306        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
307        mCarryFrameStack.pop_back();
308
309        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
310        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
311        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
312        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
313        // (for the subsequent block to subtract) using a half-adder circuit.
314
315        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
316        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
317        // Otherwise we may end up with an incorrect result or being trapped in an infinite loop.
318
319        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
320        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
321        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
322        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
323
324        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
325
326        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
327
328        BasicBlock * entry = iBuilder->GetInsertBlock();
329        BasicBlock * update = mKernel->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
330        BasicBlock * resume = mKernel->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
331
332        iBuilder->CreateBr(update);
333
334        iBuilder->SetInsertPoint(update);
335        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
336        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
337        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
338        borrow->addIncoming(carryInAccumulator, entry);
339        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
340        carry->addIncoming(carryOutAccumulator, entry);
341        // OR the updated carry in summary later for the summaryTest
342        PHINode * const carryInSummary = iBuilder->CreatePHI(getCarryPackType(), 2);
343        carryInSummary->addIncoming(Constant::getNullValue(getCarryPackType()), entry);
344
345        // half subtractor
346        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
347        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
348        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
349        Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
350        Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
351        iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
352        carryInSummary->addIncoming(nextSummary, update);
353        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
354        borrow->addIncoming(finalBorrow, update);
355
356        // half adder
357        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
358        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
359        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
360        Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
361        iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
362        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
363        carry->addIncoming(finalCarry, update);
364
365        // loop condition
366        i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
367        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
368
369        iBuilder->SetInsertPoint(resume);
370
371        IntegerType * ty = IntegerType::get(iBuilder->getContext(), getCarryPackType()->getPrimitiveSizeInBits());
372        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalBorrow, ty), ConstantInt::getNullValue(ty)), "finalBorrow != 0");
373        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalCarry, ty), ConstantInt::getNullValue(ty)), "finalCarry != 0");
374
375        assert (!mLoopIndicies.empty());
376        PHINode * index = mLoopIndicies.back();
377        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
378        mLoopIndicies.pop_back();
379
380        mNextSummaryTest = nextSummary;
381    }
382    if (mCarryInfo->hasSummary()) {
383        const auto n = mCarrySummaryStack.size(); assert (n > 1);
384        Value * carryOut = mCarrySummaryStack.back();
385        mCarrySummaryStack.pop_back();
386        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
387        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
388        // If we're returning to the base scope, reset our accumulated summary value.
389        if (n == 2) {
390            carryOut = Constant::getNullValue(getCarryPackType());
391        }
392        mCarrySummaryStack.back() = carryOut;
393    }
394}
395
396/** ------------------------------------------------------------------------------------------------------------- *
397 * @brief leaveLoopScope
398 ** ------------------------------------------------------------------------------------------------------------- */
399void CarryManager::leaveLoopScope(BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
400    assert (mLoopDepth > 0);
401    --mLoopDepth;
402    leaveScope();
403}
404
405/** ------------------------------------------------------------------------------------------------------------- *
406 * @brief enterIfScope
407 ** ------------------------------------------------------------------------------------------------------------- */
408void CarryManager::enterIfScope(const PabloBlock * const scope) {
409    ++mIfDepth;
410    enterScope(scope);
411    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
412    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
413    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
414    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
415        assert (mCurrentFrameIndex == 0);
416        mNextSummaryTest = readCarryInSummary(iBuilder->getInt32(0));
417        if (mCarryInfo->hasExplicitSummary()) {
418            mCurrentFrameIndex = 1;
419        }
420    }
421    mCarrySummaryStack.push_back(Constant::getNullValue(getCarryPackType()));
422}
423
424/** ------------------------------------------------------------------------------------------------------------- *
425 * @brief generateSummaryTest
426 ** ------------------------------------------------------------------------------------------------------------- */
427Value * CarryManager::generateSummaryTest(Value * condition) {
428    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
429        assert ("summary test was not generated" && mNextSummaryTest);
430        condition = iBuilder->simd_or(condition, mNextSummaryTest);
431        mNextSummaryTest = nullptr;
432    }
433    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
434    return condition;
435}
436
437/** ------------------------------------------------------------------------------------------------------------- *
438 * @brief enterIfBody
439 ** ------------------------------------------------------------------------------------------------------------- */
440void CarryManager::enterIfBody(BasicBlock * const entryBlock) {
441    assert (entryBlock);
442}
443
444/** ------------------------------------------------------------------------------------------------------------- *
445 * @brief leaveIfBody
446 ** ------------------------------------------------------------------------------------------------------------- */
447void CarryManager::leaveIfBody(BasicBlock * const exitBlock) {
448    assert (exitBlock);
449    const auto n = mCarrySummaryStack.size();
450    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
451        writeCarryOutSummary(mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
452    }
453    if (n > 2) {
454        mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
455    }
456}
457
458/** ------------------------------------------------------------------------------------------------------------- *
459 * @brief leaveIfScope
460 ** ------------------------------------------------------------------------------------------------------------- */
461void CarryManager::leaveIfScope(BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
462    assert (mIfDepth > 0);
463    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
464        const auto n = mCarrySummaryStack.size(); assert (n > 0);
465        if (n > 2) {
466            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
467            // appropriate summary is stored in the outer scope.
468            Value * nested = mCarrySummaryStack[n - 1];
469            Value * outer = mCarrySummaryStack[n - 2];
470            assert (nested->getType() == outer->getType());
471            PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
472            phi->addIncoming(outer, entryBlock);
473            phi->addIncoming(nested, exitBlock);
474            mCarrySummaryStack[n - 2] = phi;
475        }
476    }
477    --mIfDepth;
478    leaveScope();
479    mCarrySummaryStack.pop_back();
480}
481
482/** ------------------------------------------------------------------------------------------------------------ *
483 * @brief enterScope
484 ** ------------------------------------------------------------------------------------------------------------- */
485void CarryManager::enterScope(const PabloBlock * const scope) {
486    assert (scope);
487    // Store the state of the current frame and update the scope state
488    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
489    mCurrentScope = scope;
490    mCarryScopeIndex.push_back(++mCarryScopes);
491    mCarryInfo = &mCarryMetadata[mCarryScopes];
492    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
493    // compilation or a memory corruption has occured.
494    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
495    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
496    // Verify we're pointing to a carry frame struct
497    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
498    mCurrentFrameIndex = 0;
499}
500
501/** ------------------------------------------------------------------------------------------------------------- *
502 * @brief leaveScope
503 ** ------------------------------------------------------------------------------------------------------------- */
504void CarryManager::leaveScope() {
505
506    // Did we use all of the packs in this carry struct?
507    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
508    // Sanity test: are there remaining carry frames?
509    assert (!mCarryFrameStack.empty());
510
511    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
512
513    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
514
515    mCarryFrameStack.pop_back();
516    mCarryScopeIndex.pop_back();
517    assert (!mCarryScopeIndex.empty());
518    mCurrentScope = mCurrentScope->getPredecessor();
519    assert (mCurrentScope);
520    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
521}
522
523/** ------------------------------------------------------------------------------------------------------------- *
524 * @brief addCarryInCarryOut
525 ** ------------------------------------------------------------------------------------------------------------- */
526Value * CarryManager::addCarryInCarryOut(const Statement * const operation, Value * const e1, Value * const e2) {
527    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
528    Value * const carryIn = getNextCarryIn();
529    Value * carryOut, * result;
530    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
531    setNextCarryOut(carryOut);
532    assert (result->getType() == getBitBlockType());
533    return result;
534}
535
536/** ------------------------------------------------------------------------------------------------------------- *
537 * @brief advanceCarryInCarryOut
538 ** ------------------------------------------------------------------------------------------------------------- */
539Value * CarryManager::advanceCarryInCarryOut(const Advance * const advance, Value * const value) {
540    const auto shiftAmount = advance->getAmount();
541    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
542        Value * const carryIn = getNextCarryIn();
543        Value * carryOut, * result;
544        std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
545        setNextCarryOut(carryOut);
546        assert (result->getType() == getBitBlockType());
547        return result;
548    } else {
549        return longAdvanceCarryInCarryOut(value, shiftAmount);
550    }
551}
552
553/** ------------------------------------------------------------------------------------------------------------- *
554 * @brief longAdvanceCarryInCarryOut
555 ** ------------------------------------------------------------------------------------------------------------- */
556inline Value * CarryManager::longAdvanceCarryInCarryOut(Value * const value, const unsigned shiftAmount) {
557
558    assert (mHasLongAdvance);
559    assert (shiftAmount >= LONG_ADVANCE_BREAKPOINT);
560
561    Type * const streamTy = iBuilder->getIntNTy(getBitBlockWidth());
562
563    if (mIfDepth > 0) {
564        if (shiftAmount > getBitBlockWidth()) {
565            const auto frameIndex = mCurrentFrameIndex++;
566            Value * carry = iBuilder->CreateZExt(iBuilder->bitblock_any(value), streamTy);
567            const unsigned summarySize = ceil_udiv(shiftAmount, getBitBlockWidth() * getBitBlockWidth());
568            for (unsigned i = 0;;++i) {
569                Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), iBuilder->getInt32(i)});
570                Value * const prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamTy);
571                Value * const stream = iBuilder->CreateBitCast(iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry), getBitBlockType());
572                if (LLVM_LIKELY(i == summarySize)) {
573                    Value * const maskedStream = iBuilder->CreateAnd(stream, iBuilder->bitblock_mask_from(iBuilder->getInt32(summarySize % getBitBlockWidth())));
574                    addToCarryOutSummary(maskedStream);
575                    iBuilder->CreateBlockAlignedStore(maskedStream, ptr);
576                    break;
577                }
578                addToCarryOutSummary(stream);
579                iBuilder->CreateBlockAlignedStore(stream, ptr);
580                carry = iBuilder->CreateLShr(prior, getBitBlockWidth() - 1);
581            }
582        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
583            addToCarryOutSummary(value);
584        }
585    }
586    const auto frameIndex = mCurrentFrameIndex++;
587    // special case using a single buffer entry and the carry_out value.
588    if (LLVM_LIKELY((shiftAmount < getBitBlockWidth()) && (mLoopDepth == 0))) {
589        Value * const buffer = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), iBuilder->getInt32(0)});
590        assert (buffer->getType()->getPointerElementType() == getBitBlockType());
591        Value * carryIn = iBuilder->CreateBlockAlignedLoad(buffer);       
592        iBuilder->CreateBlockAlignedStore(value, buffer);
593        /* Very special case - no combine */
594        if (LLVM_UNLIKELY(shiftAmount == getBitBlockWidth())) {
595            return iBuilder->CreateBitCast(carryIn, getBitBlockType());
596        }
597        Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), getBitBlockWidth() - shiftAmount);
598        Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, streamTy), shiftAmount);
599        return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), getBitBlockType());
600    } else { //
601        const unsigned blockShift = shiftAmount % getBitBlockWidth();
602        const unsigned blocks = ceil_udiv(shiftAmount, getBitBlockWidth());
603        // Create a mask to implement circular buffer indexing
604        Value * indexMask = iBuilder->getSize(nearest_pow2(blocks + ((mLoopDepth != 0) ? 1 : 0)) - 1);
605        Value * blockIndex = mKernel->getScalarField("CarryBlockIndex");
606        Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(blocks));
607        Value * loadIndex0 = iBuilder->CreateAnd(carryIndex0, indexMask);
608        Value * const carryInPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), loadIndex0});
609        Value * carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
610
611        Value * storeIndex = iBuilder->CreateAnd(blockIndex, indexMask);
612        Value * const carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), storeIndex});
613        assert (carryIn->getType() == getBitBlockType());
614
615        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
616        // block in the long advance carry data area.
617        if (LLVM_UNLIKELY(blockShift == 0)) {
618            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
619            return carryIn;
620        } else { // Otherwise we need to combine data from the two oldest blocks.
621            Value * carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(blocks - 1));
622            Value * loadIndex1 = iBuilder->CreateAnd(carryIndex1, indexMask);
623            Value * const carryInPtr2 = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(frameIndex), loadIndex1});
624            Value * carry_block1 = iBuilder->CreateBlockAlignedLoad(carryInPtr2);
625            Value * block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), getBitBlockWidth() - blockShift);
626            Value * block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, streamTy), blockShift);
627            iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
628            return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), getBitBlockType());
629        }
630    }
631}
632
633/** ------------------------------------------------------------------------------------------------------------- *
634 * @brief getNextCarryIn
635 ** ------------------------------------------------------------------------------------------------------------- */
636Value * CarryManager::getNextCarryIn() {
637    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
638    if (mLoopDepth == 0) {
639        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
640    } else {
641        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
642    }
643    assert (mCarryPackPtr->getType()->getPointerElementType() == getCarryPackType());
644    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
645    if (mLoopDepth > 0) {
646        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(getCarryPackType()), mCarryPackPtr);
647    }
648    return carryIn;
649}
650
651/** ------------------------------------------------------------------------------------------------------------- *
652 * @brief setNextCarryOut
653 ** ------------------------------------------------------------------------------------------------------------- */
654void CarryManager::setNextCarryOut(Value * carryOut) {
655    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
656    carryOut = iBuilder->CreateBitCast(carryOut, getCarryPackType());
657    if (mCarryInfo->hasSummary()) {
658        addToCarryOutSummary(carryOut);
659    }
660    if (mLoopDepth != 0) {
661        mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
662        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
663            Value * accum = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
664            carryOut = iBuilder->CreateOr(carryOut, accum);
665        }
666    }
667    ++mCurrentFrameIndex;
668    assert (mCarryPackPtr->getType()->getPointerElementType() == getCarryPackType());
669    iBuilder->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
670}
671
672/** ------------------------------------------------------------------------------------------------------------- *
673 * @brief readCarryInSummary
674 ** ------------------------------------------------------------------------------------------------------------- */
675Value * CarryManager::readCarryInSummary(ConstantInt * index) const {
676    assert (mCarryInfo->hasSummary());
677
678    unsigned count = 2;
679    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
680        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
681        count = 1;
682        while (frameTy->isStructTy()) {
683            ++count;
684            frameTy = frameTy->getStructElementType(0);
685        }
686    }
687    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
688    Value * indicies[length];
689    std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
690    indicies[count - 1] = index;
691    if (mLoopDepth != 0) {
692        indicies[count] = mLoopSelector;
693    }
694
695    ArrayRef<Value *> ar(indicies, length);
696    Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
697    Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
698    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
699        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(getCarryPackType()), ptr);
700    }
701    return summary;
702}
703
704/** ------------------------------------------------------------------------------------------------------------- *
705 * @brief writeCarryOutSummary
706 ** ------------------------------------------------------------------------------------------------------------- */
707inline void CarryManager::writeCarryOutSummary(Value * const summary, ConstantInt * index) const {
708    Value * ptr = nullptr;
709    assert (mCarryInfo->hasExplicitSummary());
710    if (mLoopDepth > 0) {
711        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
712    } else {
713        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
714    }
715    iBuilder->CreateBlockAlignedStore(summary, ptr);
716}
717
718/** ------------------------------------------------------------------------------------------------------------- *
719 * @brief addToCarryOutSummary
720 ** ------------------------------------------------------------------------------------------------------------- */
721inline void CarryManager::addToCarryOutSummary(Value * const value) {
722    assert ("cannot add null summary value!" && value);   
723    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
724    assert (mCarryInfo->hasSummary());
725    mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
726}
727
728/** ------------------------------------------------------------------------------------------------------------- *
729 * @brief enumerate
730 ** ------------------------------------------------------------------------------------------------------------- */
731unsigned CarryManager::getScopeCount(PabloBlock * const scope, unsigned index) {
732    for (Statement * stmt : *scope) {
733        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
734            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
735        }
736    }
737    return index + 1;
738}
739
740/** ------------------------------------------------------------------------------------------------------------- *
741 * @brief hasIterationSpecificAssignment
742 ** ------------------------------------------------------------------------------------------------------------- */
743bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
744    if (const While * const br = dyn_cast_or_null<While>(scope->getBranch())) {
745        for (const Var * var : br->getEscaped()) {
746            for (const PabloAST * user : var->users()) {
747                if (const Extract * e = dyn_cast<Extract>(user)) {
748                    if (LLVM_UNLIKELY(e->getIndex() == var)) {
749                        // If we assign this Var a value and read the value as the index parameter
750                        // of a nested Extract statement, then we cannot collapse the carries.
751                        const PabloBlock * parent = e->getParent();
752                        for (;;) {
753                            if (parent == scope) {
754                                return true;
755                            }
756                            parent = parent->getPredecessor();
757                            if (parent == nullptr) {
758                                break;
759                            }
760                        }
761                    }
762                }
763            }
764        }
765    }
766    return false;
767}
768
769/** ------------------------------------------------------------------------------------------------------------- *
770 * @brief analyse
771 ** ------------------------------------------------------------------------------------------------------------- */
772StructType * CarryManager::analyse(PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
773
774    assert (scope != mKernel->getEntryBlock() || mCarryScopes == 0);
775    assert (mCarryScopes < mCarryMetadata.size());
776    assert (getCarryPackType());
777
778    const unsigned carryScopeIndex = mCarryScopes++;
779    const bool nonCarryCollapsingMode = hasIterationSpecificAssignment(scope);
780    Type * const carryPackType = (loopDepth == 0) ? getCarryPackType() : ArrayType::get(getCarryPackType(), 2);
781    std::vector<Type *> state;
782
783    for (Statement * stmt : *scope) {
784        if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
785            const auto amount = cast<Advance>(stmt)->getAmount();
786            Type * type = carryPackType;
787            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
788                const unsigned blocks = ceil_udiv(amount, getBitBlockWidth());
789                type = ArrayType::get(getBitBlockType(), nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
790                if (LLVM_UNLIKELY(ifDepth > 0 && amount > getBitBlockWidth())) {
791                    // 1 bit will mark the presense of any bit in each block.
792                    Type * carryType = ArrayType::get(getBitBlockType(), ceil_udiv(amount, getBitBlockWidth() * getBitBlockWidth()));
793                    state.push_back(carryType);
794                }
795                mHasLongAdvance = true;               
796            }
797            state.push_back(type);
798        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
799            state.push_back(carryPackType);
800        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
801            state.push_back(analyse(cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
802        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
803            mHasLoop = true;
804            state.push_back(analyse(cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
805        }
806    }
807    // Build the carry state struct and add the summary pack if needed.
808    CarryData & cd = mCarryMetadata[carryScopeIndex];
809    StructType * carryState = nullptr;
810    CarryData::SummaryType summaryType = CarryData::NoSummary;
811    if (LLVM_UNLIKELY(state.empty())) {
812        carryState = StructType::get(iBuilder->getContext());
813    } else {
814        //if (ifDepth > 0 || (nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop)) {
815        if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
816            if (LLVM_LIKELY(state.size() > 1)) {
817                summaryType = CarryData::ExplicitSummary;
818                // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
819                // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
820                state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
821            } else {
822                summaryType = CarryData::ImplicitSummary;
823                if (state[0]->isStructTy()) {
824                    summaryType = CarryData::BorrowedSummary;
825                }
826            }           
827        }
828        carryState = StructType::get(iBuilder->getContext(), state);
829        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
830        // carry state pointer, and summary pointer struct.
831        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
832            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), getCarryPackType()->getPointerTo(), nullptr);
833        }
834        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
835    }
836    cd.setSummaryType(summaryType);
837    return carryState;
838}
839
840inline unsigned CarryManager::getBitBlockWidth() const {
841    return iBuilder->getBitBlockWidth();
842}
843
844inline llvm::Type * CarryManager::getCarryPackType() const {
845    return getBitBlockType();
846}
847
848inline llvm::Type * CarryManager::getBitBlockType() const {
849    return iBuilder->getBitBlockType();
850}
851
852/** ------------------------------------------------------------------------------------------------------------- *
853 * @brief constructor
854 ** ------------------------------------------------------------------------------------------------------------- */
855CarryManager::CarryManager(PabloKernel * const kernel) noexcept
856: mKernel(kernel)
857, iBuilder(nullptr)
858, mCurrentFrame(nullptr)
859, mCurrentFrameIndex(0)
860, mCurrentScope(nullptr)
861, mCarryInfo(nullptr)
862, mNextSummaryTest(nullptr)
863, mIfDepth(0)
864, mHasLongAdvance(false)
865, mHasLoop(false)
866, mLoopDepth(0)
867, mLoopSelector(nullptr)
868, mNextLoopSelector(nullptr)
869, mCarryPackPtr(nullptr)
870, mCarryScopes(0) {
871
872}
873
874}
Note: See TracBrowser for help on using the repository browser.