source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5368

Last change on this file since 5368 was 5368, checked in by nmedfort, 3 years ago

Work on non carry collapsing mode. Beginning work on pablo-level phi nodes.

File size: 41.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <IR_Gen/idisa_builder.h>
12#include <llvm/IR/DerivedTypes.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18
19using namespace llvm;
20
21namespace pablo {
22
23inline static unsigned floor_log2(const unsigned v) {
24    assert ("log2(0) is undefined!" && v != 0);
25    return 31 - __builtin_clz(v);
26}
27
28inline static unsigned nearest_pow2(const uint32_t v) {
29    assert(v > 0 && v < (UINT32_MAX / 2));
30    return (v < 2) ? 1 : (1 << (32 - __builtin_clz(v - 1)));
31}
32
33inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
34    return (((x - 1) | (y - 1)) + 1) / y;
35}
36
37using TypeId = PabloAST::ClassTypeId;
38
39inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
40    switch (stmt->getClassTypeId()) {
41        case TypeId::ScanThru:
42        case TypeId::AdvanceThenScanThru:
43        case TypeId::ScanTo:
44        case TypeId::AdvanceThenScanTo:
45        case TypeId::MatchStar:
46            return true;
47        default:
48            return false;
49    }
50}
51
52/** ------------------------------------------------------------------------------------------------------------- *
53 * @brief initializeCarryData
54 ** ------------------------------------------------------------------------------------------------------------- */
55void CarryManager::initializeCarryData(PabloKernel * const kernel) {
56
57    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
58    // that is added to the Kernel. The scope index will indicate which struct to access.
59
60    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
61    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
62    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
63    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
64    // the fact every pablo While loop must be executed at least once.
65
66    // A nested loop may also contain a variable length CarryData struct
67
68    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
69    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
70    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
71    // set of carries for that iteration.
72
73    mKernel = kernel;
74
75    mCurrentScope = kernel->getEntryBlock();
76
77    mCarryScopes = 0;
78
79    mCarryMetadata.resize(getScopeCount(kernel->getEntryBlock()));
80
81    Type * ty = analyse(kernel->getEntryBlock());
82
83    mKernel->addScalar(ty, "carries");
84
85    if (mHasLoop) {
86        mKernel->addScalar(iBuilder->getInt32Ty(), "selector");
87    }
88    if (mHasLongAdvance) {
89        mKernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
90    }
91}
92
93/** ------------------------------------------------------------------------------------------------------------- *
94 * @brief initializeCodeGen
95 ** ------------------------------------------------------------------------------------------------------------- */
96void CarryManager::initializeCodeGen() {
97
98    assert(!mCarryMetadata.empty());
99    mCarryInfo = &mCarryMetadata[0];
100    assert (!mCarryInfo->hasSummary());
101
102    mCurrentFrame = mKernel->getScalarFieldPtr("carries");
103    mCurrentFrameIndex = 0;
104    mCarryScopes = 0;
105    mCarryScopeIndex.push_back(0);
106
107    assert (mCarryFrameStack.empty());
108
109    assert (mCarrySummaryStack.empty());
110    mCarrySummaryStack.push_back(Constant::getNullValue(mCarryPackType));
111
112    if (mHasLoop) {
113        mLoopSelector = mKernel->getScalarField("selector");
114        mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
115    }
116}
117
118/** ------------------------------------------------------------------------------------------------------------- *
119 * @brief finalizeCodeGen
120 ** ------------------------------------------------------------------------------------------------------------- */
121void CarryManager::finalizeCodeGen() {
122    if (mHasLoop) {
123        mKernel->setScalarField("selector", mNextLoopSelector);
124    }
125    if (mHasLongAdvance) {
126        Value * idx = mKernel->getScalarField("CarryBlockIndex");
127        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
128        mKernel->setScalarField("CarryBlockIndex", idx);
129    }
130    assert (mCarryFrameStack.empty());   
131    assert (mCarrySummaryStack.size() == 1);
132    assert ("base summary value was overwritten!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
133    mCarrySummaryStack.clear();
134
135    assert (mCarryScopeIndex.size() == 1);
136    mCarryScopeIndex.clear();
137}
138
139/** ------------------------------------------------------------------------------------------------------------- *
140 * @brief enterLoopScope
141 ** ------------------------------------------------------------------------------------------------------------- */
142void CarryManager::enterLoopScope(const PabloBlock * const scope) {
143    assert (scope);
144    assert (mHasLoop);
145    ++mLoopDepth;
146    enterScope(scope);
147}
148
149/** ------------------------------------------------------------------------------------------------------------- *
150 * @brief enterLoopBody
151 ** ------------------------------------------------------------------------------------------------------------- */
152void CarryManager::enterLoopBody(BasicBlock * const entryBlock) {
153    if (mCarryInfo->hasSummary()) {
154        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
155        assert (!mCarrySummaryStack.empty());
156        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
157        // Replace the incoming carry summary with the phi node and add the phi node to the stack  so that we can
158        // properly OR it into the outgoing summary value.
159        // NOTE: this may change the base summary value; when exiting to the base scope, replace this summary with
160        // a null value to prevent subsequent nested scopes from inheriting the summary of this scope.
161        mCarrySummaryStack.back() = phiCarryOutSummary;
162        mCarrySummaryStack.push_back(phiCarryOutSummary);
163    }
164    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
165
166        assert (mCarryInfo->hasSummary());
167
168        // Check whether we need to resize the carry state
169        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
170        mLoopIndicies.push_back(index);
171        index->addIncoming(iBuilder->getSize(0), entryBlock);
172
173        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
174        Value * capacity = iBuilder->CreateLoad(capacityPtr, false, "capacity");
175        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
176        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
177        Value * array = iBuilder->CreateLoad(arrayPtr, false, "array");
178
179        BasicBlock * const entry = iBuilder->GetInsertBlock();
180        BasicBlock * const resizeCarryState = mKernel->CreateBasicBlock("ResizeCarryState");
181        BasicBlock * const reallocExisting = mKernel->CreateBasicBlock("ReallocExisting");
182        BasicBlock * const createNew = mKernel->CreateBasicBlock("CreateNew");
183        BasicBlock * const resumeKernel = mKernel->CreateBasicBlock("ResumeKernel");
184
185        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(index, capacity), resumeKernel, resizeCarryState);
186
187        // RESIZE CARRY BLOCK
188        iBuilder->SetInsertPoint(resizeCarryState);
189        const auto BlockWidth = mCarryPackType->getPrimitiveSizeInBits() / 8;
190        const auto Log2BlockWidth = floor_log2(BlockWidth);
191        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
192        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
193
194        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
195
196        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
197
198        // REALLOCATE EXISTING
199        iBuilder->SetInsertPoint(reallocExisting);
200
201        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
202        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
203
204
205        Value * newArray = iBuilder->CreateAlignedMalloc(newCapacitySize, iBuilder->getCacheAlignment());
206        iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
207        iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
208        iBuilder->CreateAlignedFree(array);
209        newArray = iBuilder->CreatePointerCast(newArray, array->getType());
210        iBuilder->CreateStore(newArray, arrayPtr);
211
212        Value * const log2capacity = iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE);
213        Value * const summarySize = iBuilder->CreateShl(log2capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
214        Value * const newLog2Capacity = iBuilder->CreateAdd(log2capacity, ONE);
215        Value * const newSummarySize = iBuilder->CreateShl(newLog2Capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
216
217        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
218        Value * newSummary = iBuilder->CreateAlignedMalloc(newSummarySize, BlockWidth);
219        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
220        iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
221        iBuilder->CreateAlignedFree(summary);
222
223        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
224        ptr1 = iBuilder->CreatePointerCast(ptr1, mCarryPackType->getPointerTo());
225
226        Value * ptr2 = iBuilder->CreateGEP(newSummary, iBuilder->CreateAdd(summarySize, iBuilder->getSize(BlockWidth)));
227        ptr2 = iBuilder->CreatePointerCast(ptr2, mCarryPackType->getPointerTo());
228
229        newSummary = iBuilder->CreatePointerCast(newSummary, mCarryPackType->getPointerTo());
230        iBuilder->CreateStore(newSummary, summaryPtr);
231        Value * const newCapacity = iBuilder->CreateShl(ONE, log2capacity);
232
233        iBuilder->CreateStore(newCapacity, capacityPtr);
234
235        iBuilder->CreateBr(resumeKernel);
236
237        // CREATE NEW
238        iBuilder->SetInsertPoint(createNew);
239
240        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
241        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
242        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
243
244        Value * initialArray = iBuilder->CreateAlignedMalloc(initialCapacitySize, iBuilder->getCacheAlignment());
245        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
246        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
247        iBuilder->CreateStore(initialArray, arrayPtr);
248
249        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
250        Value * initialSummary = iBuilder->CreateAlignedMalloc(initialSummarySize, BlockWidth);
251        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
252        initialSummary = iBuilder->CreatePointerCast(initialSummary, mCarryPackType->getPointerTo());
253        iBuilder->CreateStore(initialSummary, summaryPtr);
254
255        iBuilder->CreateStore(initialCapacity, capacityPtr);
256
257        iBuilder->CreateBr(resumeKernel);
258
259        // RESUME KERNEL
260        iBuilder->SetInsertPoint(resumeKernel);
261        // Load the appropriate carry stat block
262        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
263        phiArrayPtr->addIncoming(array, entry);
264        phiArrayPtr->addIncoming(initialArray, createNew);
265        phiArrayPtr->addIncoming(newArray, reallocExisting);
266
267        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
268        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
269        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
270    }
271}
272
273/** ------------------------------------------------------------------------------------------------------------- *
274 * @brief leaveLoopBody
275 ** ------------------------------------------------------------------------------------------------------------- */
276void CarryManager::leaveLoopBody(BasicBlock * /* exitBlock */) {
277
278    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
279
280        assert (mCarryInfo->hasSummary());
281
282        ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
283
284        Value * const carryInAccumulator = readCarryInSummary(summaryIndex);
285        Value * const carryOutAccumulator = mCarrySummaryStack.back();
286
287        if (mCarryInfo->hasExplicitSummary()) {
288            writeCarryOutSummary(carryOutAccumulator, summaryIndex);
289        }
290
291        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
292        mCarryFrameStack.pop_back();
293
294        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
295        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
296        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
297        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
298        // (for the subsequent block to subtract) using a half-adder circuit.
299
300        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
301        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
302        // Otherwise we may end up with an incorrect result or being trapped in an infinite loop.
303
304        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
305        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
306        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
307        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
308
309        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
310
311        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
312
313        BasicBlock * entry = iBuilder->GetInsertBlock();
314        BasicBlock * update = mKernel->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
315        BasicBlock * resume = mKernel->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
316
317        iBuilder->CreateBr(update);
318
319        iBuilder->SetInsertPoint(update);
320        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
321        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
322        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
323        borrow->addIncoming(carryInAccumulator, entry);
324        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
325        carry->addIncoming(carryOutAccumulator, entry);
326        // OR the updated carry in summary later for the summaryTest
327        PHINode * const carryInSummary = iBuilder->CreatePHI(mCarryPackType, 2);
328        carryInSummary->addIncoming(Constant::getNullValue(mCarryPackType), entry);
329
330        // half subtractor
331        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
332        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
333        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
334        Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
335        Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
336        iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
337        carryInSummary->addIncoming(nextSummary, update);
338        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
339        borrow->addIncoming(finalBorrow, update);
340
341        // half adder
342        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
343        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
344        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
345        Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
346        iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
347        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
348        carry->addIncoming(finalCarry, update);
349
350        // loop condition
351        i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
352        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
353
354        iBuilder->SetInsertPoint(resume);
355
356        IntegerType * ty = IntegerType::get(iBuilder->getContext(), mCarryPackType->getPrimitiveSizeInBits());
357        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalBorrow, ty), ConstantInt::getNullValue(ty)), "finalBorrow != 0");
358        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalCarry, ty), ConstantInt::getNullValue(ty)), "finalCarry != 0");
359
360        assert (!mLoopIndicies.empty());
361        PHINode * index = mLoopIndicies.back();
362        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
363        mLoopIndicies.pop_back();
364
365        mNextSummaryTest = nextSummary;
366    }
367    if (mCarryInfo->hasSummary()) {
368        const auto n = mCarrySummaryStack.size(); assert (n > 1);
369        Value * carryOut = mCarrySummaryStack.back();
370        mCarrySummaryStack.pop_back();
371        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
372        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
373        // If we're returning to the base scope, reset our accumulated summary value.
374        if (n == 2) {
375            carryOut = Constant::getNullValue(mCarryPackType);
376        }
377        mCarrySummaryStack.back() = carryOut;
378    }
379}
380
381/** ------------------------------------------------------------------------------------------------------------- *
382 * @brief leaveLoopScope
383 ** ------------------------------------------------------------------------------------------------------------- */
384void CarryManager::leaveLoopScope(BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
385    assert (mLoopDepth > 0);
386    --mLoopDepth;
387    leaveScope();
388}
389
390/** ------------------------------------------------------------------------------------------------------------- *
391 * @brief enterIfScope
392 ** ------------------------------------------------------------------------------------------------------------- */
393void CarryManager::enterIfScope(const PabloBlock * const scope) {
394    ++mIfDepth;
395    enterScope(scope);
396    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
397    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
398    // generated by a statement within this If scope and not by a dominating statement in the outer scope.
399    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
400        assert (mCurrentFrameIndex == 0);
401        mNextSummaryTest = readCarryInSummary(iBuilder->getInt32(0));
402        if (mCarryInfo->hasExplicitSummary()) {
403            mCurrentFrameIndex = 1;
404        }
405    }
406    mCarrySummaryStack.push_back(Constant::getNullValue(mCarryPackType));
407}
408
409/** ------------------------------------------------------------------------------------------------------------- *
410 * @brief generateSummaryTest
411 ** ------------------------------------------------------------------------------------------------------------- */
412Value * CarryManager::generateSummaryTest(Value * condition) {
413    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
414        assert ("summary test was not generated" && mNextSummaryTest);
415        condition = iBuilder->simd_or(condition, mNextSummaryTest);
416        mNextSummaryTest = nullptr;
417    }
418    assert ("summary test was not consumed" && (mNextSummaryTest == nullptr));
419    return condition;
420}
421
422/** ------------------------------------------------------------------------------------------------------------- *
423 * @brief enterIfBody
424 ** ------------------------------------------------------------------------------------------------------------- */
425void CarryManager::enterIfBody(BasicBlock * const entryBlock) {
426    assert (entryBlock);
427}
428
429/** ------------------------------------------------------------------------------------------------------------- *
430 * @brief leaveIfBody
431 ** ------------------------------------------------------------------------------------------------------------- */
432void CarryManager::leaveIfBody(BasicBlock * const exitBlock) {
433    assert (exitBlock);
434    const auto n = mCarrySummaryStack.size();
435    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
436        writeCarryOutSummary(mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
437    }
438    if (n > 2) {
439        mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
440    }
441}
442
443/** ------------------------------------------------------------------------------------------------------------- *
444 * @brief leaveIfScope
445 ** ------------------------------------------------------------------------------------------------------------- */
446void CarryManager::leaveIfScope(BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
447    assert (mIfDepth > 0);
448    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
449        const auto n = mCarrySummaryStack.size(); assert (n > 0);
450        if (n > 2) {
451            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
452            // appropriate summary is stored in the outer scope.
453            Value * nested = mCarrySummaryStack[n - 1];
454            Value * outer = mCarrySummaryStack[n - 2];
455            assert (nested->getType() == outer->getType());
456            PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
457            phi->addIncoming(outer, entryBlock);
458            phi->addIncoming(nested, exitBlock);
459            mCarrySummaryStack[n - 2] = phi;
460        }
461    }
462    --mIfDepth;
463    leaveScope();
464    mCarrySummaryStack.pop_back();
465}
466
467/** ------------------------------------------------------------------------------------------------------------ *
468 * @brief enterScope
469 ** ------------------------------------------------------------------------------------------------------------- */
470void CarryManager::enterScope(const PabloBlock * const scope) {
471    assert (scope);
472    // Store the state of the current frame and update the scope state
473    mCarryFrameStack.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
474    mCurrentScope = scope;
475    mCarryScopeIndex.push_back(++mCarryScopes);
476    mCarryInfo = &mCarryMetadata[mCarryScopes];
477    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
478    // compilation or a memory corruption has occured.
479    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
480    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
481    // Verify we're pointing to a carry frame struct
482    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
483    mCurrentFrameIndex = 0;
484}
485
486/** ------------------------------------------------------------------------------------------------------------- *
487 * @brief leaveScope
488 ** ------------------------------------------------------------------------------------------------------------- */
489void CarryManager::leaveScope() {
490
491    // Did we use all of the packs in this carry struct?
492    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
493    // Sanity test: are there remaining carry frames?
494    assert (!mCarryFrameStack.empty());
495
496    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrameStack.back();
497
498    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
499
500    mCarryFrameStack.pop_back();
501    mCarryScopeIndex.pop_back();
502    assert (!mCarryScopeIndex.empty());
503    mCurrentScope = mCurrentScope->getPredecessor();
504    assert (mCurrentScope);
505    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
506}
507
508/** ------------------------------------------------------------------------------------------------------------- *
509 * @brief addCarryInCarryOut
510 ** ------------------------------------------------------------------------------------------------------------- */
511Value * CarryManager::addCarryInCarryOut(const Statement * const operation, Value * const e1, Value * const e2) {
512    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
513    Value * const carryIn = getNextCarryIn();
514    Value * carryOut, * result;
515    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
516    setNextCarryOut(carryOut);
517    assert (result->getType() == mBitBlockType);
518    return result;
519}
520
521/** ------------------------------------------------------------------------------------------------------------- *
522 * @brief advanceCarryInCarryOut
523 ** ------------------------------------------------------------------------------------------------------------- */
524Value * CarryManager::advanceCarryInCarryOut(const Advance * const advance, Value * const value) {
525    const auto shiftAmount = advance->getAmount();
526    if (LLVM_LIKELY(shiftAmount <= mBitBlockWidth)) {
527        Value * const carryIn = getNextCarryIn();
528        Value * carryOut, * result;
529        if (LLVM_UNLIKELY(shiftAmount == mBitBlockWidth)) {
530            result = carryIn;
531            carryOut = value;
532        } else {
533            std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
534        }
535        setNextCarryOut(carryOut);
536        assert (result->getType() == mBitBlockType);
537        return result;
538    } else {
539        return longAdvanceCarryInCarryOut(value, shiftAmount);
540    }
541}
542
543/** ------------------------------------------------------------------------------------------------------------- *
544 * @brief longAdvanceCarryInCarryOut
545 ** ------------------------------------------------------------------------------------------------------------- */
546Value * CarryManager::longAdvanceCarryInCarryOut(Value * const value, const unsigned shiftAmount) {
547
548    assert (shiftAmount > mBitBlockWidth);
549    assert (mHasLongAdvance);
550
551    Type * const streamVectorTy = iBuilder->getIntNTy(mBitBlockWidth);
552    Value * buffer = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex++), iBuilder->getInt32(0)});
553
554    const unsigned blockShift = shiftAmount % mBitBlockWidth;
555    const unsigned entries = ceil_udiv(shiftAmount, mBitBlockWidth);
556
557    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
558        Value * const summaryPtr = iBuilder->CreateGEP(buffer, iBuilder->getInt32(0));
559        assert (summaryPtr->getType()->getPointerElementType() == mBitBlockType);
560        Value * carry = iBuilder->CreateZExtOrBitCast(iBuilder->bitblock_any(value), streamVectorTy);
561        const auto limit = ceil_udiv(shiftAmount, std::pow(mBitBlockWidth, 2));
562        assert (limit == summaryPtr->getType()->getPointerElementType()->getArrayNumElements());
563        for (unsigned i = 0;;++i) {
564            Value * ptr = iBuilder->CreateGEP(summaryPtr, iBuilder->getInt32(i));
565            Value * prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamVectorTy);
566            Value * stream = iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry);
567            if (LLVM_LIKELY(i == limit)) {
568                stream = iBuilder->CreateAnd(stream, iBuilder->bitblock_mask_from(iBuilder->getInt32(entries % mBitBlockWidth)));
569                addToCarryOutSummary(stream);
570                iBuilder->CreateBlockAlignedStore(stream, ptr);               
571                buffer = iBuilder->CreateGEP(buffer, iBuilder->getInt32(1));
572                break;
573            }
574            addToCarryOutSummary(stream);
575            iBuilder->CreateBlockAlignedStore(stream, ptr);
576            carry = iBuilder->CreateLShr(prior, mBitBlockWidth - 1);
577        }
578    }
579    assert (buffer->getType()->getPointerElementType() == mBitBlockType);
580
581    // Create a mask to implement circular buffer indexing
582    Value * indexMask = iBuilder->getSize(nearest_pow2(entries) - 1);   
583    Value * blockIndex = mKernel->getScalarField("CarryBlockIndex");
584    Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(entries));
585    Value * loadIndex0 = iBuilder->CreateAnd(carryIndex0, indexMask);
586    Value * storeIndex = iBuilder->CreateAnd(blockIndex, indexMask);
587    Value * carryIn = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(buffer, loadIndex0));
588    assert (carryIn->getType() == mBitBlockType);
589
590    // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
591    // block in the long advance carry data area. 
592    if (blockShift == 0) {
593        iBuilder->CreateBlockAlignedStore(value, iBuilder->CreateGEP(buffer, storeIndex));
594        return carryIn;
595    }
596    // Otherwise we need to combine data from the two oldest blocks.
597    Value * carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(entries - 1));
598    Value * loadIndex1 = iBuilder->CreateAnd(carryIndex1, indexMask);
599    Value * carry_block1 = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(buffer, loadIndex1));
600    Value * block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamVectorTy), mBitBlockWidth - blockShift);
601    Value * block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, streamVectorTy), blockShift);
602    iBuilder->CreateBlockAlignedStore(value, iBuilder->CreateGEP(buffer, storeIndex));
603    return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
604}
605
606/** ------------------------------------------------------------------------------------------------------------- *
607 * @brief getNextCarryIn
608 ** ------------------------------------------------------------------------------------------------------------- */
609Value * CarryManager::getNextCarryIn() {
610    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
611    Value * carryInPtr = nullptr;
612    if (mLoopDepth == 0) {
613        carryInPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
614    } else {
615        carryInPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
616    }
617    assert (carryInPtr->getType()->getPointerElementType() == mCarryPackType);
618    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
619    if (mLoopDepth > 0) {
620        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(mCarryPackType), carryInPtr);
621    }
622    return carryIn;
623}
624
625/** ------------------------------------------------------------------------------------------------------------- *
626 * @brief setNextCarryOut
627 ** ------------------------------------------------------------------------------------------------------------- */
628void CarryManager::setNextCarryOut(Value * carryOut) {
629    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
630    carryOut = iBuilder->CreateBitCast(carryOut, mCarryPackType);
631    if (mCarryInfo->hasSummary()) {
632        addToCarryOutSummary(carryOut);
633    }
634    Value * carryOutPtr = nullptr;
635    if (mLoopDepth == 0) {
636        carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
637    } else {
638        carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
639        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
640            Value * accum = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
641            carryOut = iBuilder->CreateOr(carryOut, accum);
642        }
643    }
644    ++mCurrentFrameIndex;
645    assert (carryOutPtr->getType()->getPointerElementType() == mCarryPackType);
646    iBuilder->CreateBlockAlignedStore(carryOut, carryOutPtr);
647}
648
649/** ------------------------------------------------------------------------------------------------------------- *
650 * @brief readCarryInSummary
651 ** ------------------------------------------------------------------------------------------------------------- */
652Value * CarryManager::readCarryInSummary(ConstantInt * index) const {
653    assert (mCarryInfo->hasSummary());
654    unsigned count = 2;
655    if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
656        Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
657        count = 1;
658        while (frameTy->isStructTy()) {
659            ++count;
660            frameTy = frameTy->getStructElementType(0);
661        }
662    }
663    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
664    Value * indicies[length];
665    std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
666    indicies[count - 1] = index;
667    if (mLoopDepth != 0) {
668        indicies[count] = mLoopSelector;
669    }
670    ArrayRef<Value *> ar(indicies, length);
671    Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
672    Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
673    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
674        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(mCarryPackType), ptr);
675    }
676    return summary;
677}
678
679/** ------------------------------------------------------------------------------------------------------------- *
680 * @brief writeCarryOutSummary
681 ** ------------------------------------------------------------------------------------------------------------- */
682inline void CarryManager::writeCarryOutSummary(Value * const summary, ConstantInt * index) const {
683    Value * ptr = nullptr;
684    assert (mCarryInfo->hasExplicitSummary());
685    if (mLoopDepth > 0) {
686        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
687    } else {
688        ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
689    }
690    iBuilder->CreateBlockAlignedStore(summary, ptr);
691}
692
693/** ------------------------------------------------------------------------------------------------------------- *
694 * @brief addToCarryOutSummary
695 ** ------------------------------------------------------------------------------------------------------------- */
696inline void CarryManager::addToCarryOutSummary(Value * const value) {
697    assert ("cannot add null summary value!" && value);   
698    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
699    assert (mCarryInfo->hasSummary());
700    mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
701}
702
703/** ------------------------------------------------------------------------------------------------------------- *
704 * @brief enumerate
705 ** ------------------------------------------------------------------------------------------------------------- */
706unsigned CarryManager::getScopeCount(PabloBlock * const scope, unsigned index) {
707    for (Statement * stmt : *scope) {
708        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
709            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
710        }
711    }
712    return index + 1;
713}
714
715/** ------------------------------------------------------------------------------------------------------------- *
716 * @brief hasIterationSpecificAssignment
717 ** ------------------------------------------------------------------------------------------------------------- */
718bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
719    if (const While * const br = dyn_cast_or_null<While>(scope->getBranch())) {
720        for (const Var * var : br->getEscaped()) {
721            for (const PabloAST * user : var->users()) {
722                if (const Extract * e = dyn_cast<Extract>(user)) {
723                    if (LLVM_UNLIKELY(e->getIndex() == var)) {
724                        // If we assign this Var a value and read the value as the index parameter
725                        // of a nested Extract statement, then we cannot collapse the carries.
726                        const PabloBlock * parent = e->getParent();
727                        for (;;) {
728                            if (parent == scope) {
729                                return true;
730                            }
731                            parent = parent->getPredecessor();
732                            if (parent == nullptr) {
733                                break;
734                            }
735                        }
736                    }
737                }
738            }
739        }
740    }
741    return false;
742}
743
744/** ------------------------------------------------------------------------------------------------------------- *
745 * @brief analyse
746 ** ------------------------------------------------------------------------------------------------------------- */
747StructType * CarryManager::analyse(PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
748
749    assert (scope != mKernel->getEntryBlock() || mCarryScopes == 0);
750    assert (mCarryScopes < mCarryMetadata.size());
751    assert (mCarryPackType);
752
753    const unsigned carryScopeIndex = mCarryScopes++;
754    const bool nonCarryCollapsingMode = hasIterationSpecificAssignment(scope);
755    Type * const carryPackType = (loopDepth == 0) ? mCarryPackType : ArrayType::get(mCarryPackType, 2);
756    bool hasLongAdvances = false;
757    std::vector<Type *> state;
758
759    for (Statement * stmt : *scope) {
760        if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
761            const auto amount = cast<Advance>(stmt)->getAmount();
762            Type * type = carryPackType;
763            if (LLVM_UNLIKELY(amount > mBitBlockWidth)) {
764                const auto blocks = ceil_udiv(amount, mBitBlockWidth); assert (blocks > 1);
765                type = ArrayType::get(mBitBlockType, nearest_pow2(blocks));
766                if (LLVM_UNLIKELY(ifDepth > 0)) {
767                    Type * carryType = ArrayType::get(mBitBlockType, ceil_udiv(amount, std::pow(mBitBlockWidth, 2)));
768                    type = StructType::get(carryType, type, nullptr);
769                    hasLongAdvances = true;                   
770                }
771                mHasLongAdvance = true;
772            }
773            state.push_back(type);
774        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
775            state.push_back(carryPackType);
776        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
777            state.push_back(analyse(cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
778        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
779            mHasLoop = true;
780            state.push_back(analyse(cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
781        }
782    }
783    // Build the carry state struct and add the summary pack if needed.
784    CarryData & cd = mCarryMetadata[carryScopeIndex];
785    StructType * carryState = nullptr;
786    CarryData::SummaryType summaryType = CarryData::NoSummary;
787    if (LLVM_UNLIKELY(state.empty())) {
788        carryState = StructType::get(iBuilder->getContext());
789    } else {
790        //if (ifDepth > 0 || (nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop)) {
791        if (dyn_cast_or_null<If>(scope->getBranch()) || nonCarryCollapsingMode || isNestedWithinNonCarryCollapsingLoop) {
792            if (LLVM_LIKELY(state.size() > 1 || hasLongAdvances)) {
793                summaryType = CarryData::ExplicitSummary;
794                // NOTE: summaries are stored differently depending whether we're entering an If or While branch. With an If branch, they
795                // preceed the carry state data and with a While loop they succeed it. This is to help cache prefectching performance.
796                state.insert(isa<If>(scope->getBranch()) ? state.begin() : state.end(), carryPackType);
797            } else {
798                summaryType = CarryData::ImplicitSummary;
799                if (state[0]->isStructTy()) {
800                    summaryType = CarryData::BorrowedSummary;
801                }
802            }           
803        }
804        carryState = StructType::get(iBuilder->getContext(), state);
805        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
806        // carry state pointer, and summary pointer struct.
807        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
808            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), mCarryPackType->getPointerTo(), nullptr);
809        }
810        cd.setNonCollapsingCarryMode(nonCarryCollapsingMode);
811    }
812    cd.setSummaryType(summaryType);
813    return carryState;
814}
815
816/** ------------------------------------------------------------------------------------------------------------- *
817 * @brief constructor
818 ** ------------------------------------------------------------------------------------------------------------- */
819CarryManager::CarryManager(IDISA::IDISA_Builder * idb) noexcept
820: iBuilder(idb)
821, mKernel(nullptr)
822, mSelf(nullptr)
823, mBitBlockType(idb->getBitBlockType())
824, mBitBlockWidth(idb->getBitBlockWidth())
825, mCurrentFrame(nullptr)
826, mCurrentFrameIndex(0)
827, mCurrentScope(nullptr)
828, mCarryInfo(nullptr)
829, mCarryPackType(mBitBlockType)
830, mNextSummaryTest(nullptr)
831, mIfDepth(0)
832, mHasLongAdvance(false)
833, mHasLoop(false)
834, mLoopDepth(0)
835, mLoopSelector(nullptr)
836, mNextLoopSelector(nullptr)
837, mCarryScopes(0) {
838
839}
840
841}
Note: See TracBrowser for help on using the repository browser.