source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 5361

Last change on this file since 5361 was 5361, checked in by nmedfort, 2 years ago

Work on non-carry collapsing mode.

File size: 42.2 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "carry_manager.h"
8#include <pablo/carry_data.h>
9#include <pablo/codegenstate.h>
10#include <llvm/IR/BasicBlock.h>
11#include <IR_Gen/idisa_builder.h>
12#include <llvm/IR/DerivedTypes.h>
13#include <pablo/branch.h>
14#include <pablo/pe_advance.h>
15#include <pablo/pe_scanthru.h>
16#include <pablo/pe_matchstar.h>
17#include <pablo/pe_var.h>
18#include <llvm/Support/raw_ostream.h>
19using namespace llvm;
20
21namespace pablo {
22
23inline static unsigned floor_log2(const unsigned v) {
24    assert ("log2(0) is undefined!" && v != 0);
25    return 31 - __builtin_clz(v);
26}
27
28inline static unsigned nearest_pow2(const uint32_t v) {
29    assert(v > 0 && v < (UINT32_MAX / 2));
30    return (v < 2) ? 1 : (1 << (32 - __builtin_clz(v - 1)));
31}
32
33inline static unsigned ceil_udiv(const unsigned x, const unsigned y) {
34    return (((x - 1) | (y - 1)) + 1) / y;
35}
36
37using TypeId = PabloAST::ClassTypeId;
38
39inline static bool isNonAdvanceCarryGeneratingStatement(const Statement * const stmt) {
40    switch (stmt->getClassTypeId()) {
41        case TypeId::ScanThru:
42        case TypeId::AdvanceThenScanThru:
43        case TypeId::ScanTo:
44        case TypeId::AdvanceThenScanTo:
45        case TypeId::MatchStar:
46            return true;
47        default:
48            return false;
49    }
50}
51
52/** ------------------------------------------------------------------------------------------------------------- *
53 * @brief initializeCarryData
54 ** ------------------------------------------------------------------------------------------------------------- */
55void CarryManager::initializeCarryData(PabloKernel * const kernel) {
56
57    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
58    // that is added to the Kernel. The scope index will indicate which struct to access.
59
60    // A CarryData struct either contains an array of CarryPackBlocks or an integer indicating the capacity of
61    // the variable length CarryData struct and pointer. A variable length CarryData struct is required whenever
62    // the streams accessed by a loop could vary between iterations. When resizing a CarryData struct for a
63    // particular loop, the current loop struct and all nested structs need to be resized. This accommodates
64    // the fact every pablo While loop must be executed at least once.
65
66    // A nested loop may also contain a variable length CarryData struct
67
68    // To determine whether we require a variable length CarryData struct, we test the escaped variables of
69    // each loop branch to see whether they are used as the index parameter of a nested Extract statement.
70    // Any scope that requires variable length CarryData, requires that all nested branches have a unique
71    // set of carries for that iteration.
72
73    mKernel = kernel;
74
75    mCurrentScope = kernel->getEntryBlock();
76
77    mCarryScopes = 0;
78
79    mCarryMetadata.resize(getScopeCount(kernel->getEntryBlock()));
80
81    mKernel->addScalar(analyse(kernel->getEntryBlock()), "carries");
82
83    if (mHasLoop) {
84        mKernel->addScalar(iBuilder->getInt32Ty(), "selector");
85    }
86    if (mHasLongAdvance) {
87        mKernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
88    }
89}
90
91/** ------------------------------------------------------------------------------------------------------------- *
92 * @brief initializeCodeGen
93 ** ------------------------------------------------------------------------------------------------------------- */
94void CarryManager::initializeCodeGen() {
95
96    assert(!mCarryMetadata.empty());
97    mCarryInfo = &mCarryMetadata[0];
98    assert (!mCarryInfo->hasSummary());
99
100    mCurrentFrame = mKernel->getScalarFieldPtr("carries");
101    mCurrentFrameIndex = 0;
102    mCarryScopes = 0;
103    mCarryScopeIndex.push_back(0);
104
105
106    assert (mCarryFrame.empty());
107
108    assert (mCarryInSummary.empty());
109    mCarryInSummary.push_back(Constant::getNullValue(mCarryPackType));
110
111    assert (mCarryOutSummary.empty());
112    mCarryOutSummary.push_back(Constant::getNullValue(mCarryPackType));
113
114    if (mHasLoop) {
115        mLoopSelector = mKernel->getScalarField("selector");
116    }
117}
118
119/** ------------------------------------------------------------------------------------------------------------- *
120 * @brief finalizeCodeGen
121 ** ------------------------------------------------------------------------------------------------------------- */
122void CarryManager::finalizeCodeGen() {
123    if (mHasLoop) {
124        mKernel->setScalarField("selector", iBuilder->CreateXor(mLoopSelector, iBuilder->getInt32(1)));
125    }
126    if (mHasLongAdvance) {
127        Value * idx = mKernel->getScalarField("CarryBlockIndex");
128        idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
129        mKernel->setScalarField("CarryBlockIndex", idx);
130    }
131    assert (mCarryFrame.empty());
132
133    assert (mCarryInSummary.size() == 1);
134    mCarryInSummary.clear();
135
136    assert (mCarryOutSummary.size() == 1);
137    mCarryOutSummary.clear();
138
139    assert (mCarryScopeIndex.size() == 1);
140    mCarryScopeIndex.clear();
141}
142
143/** ------------------------------------------------------------------------------------------------------------- *
144 * @brief enterLoopScope
145 ** ------------------------------------------------------------------------------------------------------------- */
146void CarryManager::enterLoopScope(const PabloBlock * const scope) {
147    assert (scope);
148    ++mLoopDepth;
149    enterScope(scope);
150}
151
152/** ------------------------------------------------------------------------------------------------------------- *
153 * @brief enterLoopBody
154 ** ------------------------------------------------------------------------------------------------------------- */
155void CarryManager::enterLoopBody(BasicBlock * const entryBlock) {
156
157    assert (mHasLoop);
158
159    if (mCarryInfo->hasSummary()) {
160        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
161        assert (!mCarryOutSummary.empty());
162        phiCarryOutSummary->addIncoming(mCarryOutSummary.back(), entryBlock);
163        // Replace the incoming carry summary with the phi node and add the phi node to the stack
164        // so that we can properly OR it into the outgoing summary value.
165        mCarryOutSummary.back() = phiCarryOutSummary;
166        Value * carryOut = phiCarryOutSummary;
167        // In non-carry-collapsing mode, the carry out summary of this block iteration *MUST* match the carry in of the
168        // subsequent block iteration. Otherwise the subsequent block iteration may become trapped in an infinite loop.
169        // To ensure this, we effectively "zero-initialize" the carry-out coming into this loop but OR in carry-out
170        // of the outer scope for the phi value the end of the loop body. This avoids us needing to maintain a carry-in
171        // summary for all outer scopes whenever only a nested scope requires this mode.
172        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
173            carryOut = Constant::getNullValue(mCarryPackType);
174        }
175        mCarryOutSummary.push_back(carryOut);
176    }
177
178    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
179
180        // Check whether we need to resize the carry state
181        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
182        mLoopIndicies.push_back(index);
183        index->addIncoming(iBuilder->getSize(0), entryBlock);
184
185        mCarryInSummary.push_back(Constant::getNullValue(mCarryPackType));
186
187        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
188        Value * capacity = iBuilder->CreateLoad(capacityPtr, false, "capacity");
189        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
190        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
191        Value * array = iBuilder->CreateLoad(arrayPtr, false, "array");
192
193        BasicBlock * const entry = iBuilder->GetInsertBlock();
194        BasicBlock * const resizeCarryState = mKernel->CreateBasicBlock("ResizeCarryState");
195        BasicBlock * const reallocExisting = mKernel->CreateBasicBlock("ReallocExisting");
196        BasicBlock * const createNew = mKernel->CreateBasicBlock("CreateNew");
197        BasicBlock * const resumeKernel = mKernel->CreateBasicBlock("ResumeKernel");
198
199        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(index, capacity), resumeKernel, resizeCarryState);
200
201        // RESIZE CARRY BLOCK
202        iBuilder->SetInsertPoint(resizeCarryState);
203        const auto BlockWidth = mCarryPackType->getPrimitiveSizeInBits() / 8;
204        const auto Log2BlockWidth = floor_log2(BlockWidth);
205        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
206        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
207
208        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
209
210        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
211
212        // REALLOCATE EXISTING
213        iBuilder->SetInsertPoint(reallocExisting);
214
215        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
216        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
217
218
219        Value * newArray = iBuilder->CreateAlignedMalloc(newCapacitySize, iBuilder->getCacheAlignment());
220        iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
221        iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
222        iBuilder->CreateAlignedFree(array);
223        newArray = iBuilder->CreatePointerCast(newArray, array->getType());
224        iBuilder->CreateStore(newArray, arrayPtr);
225
226        Value * const log2capacity = iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE);
227        Value * const summarySize = iBuilder->CreateShl(log2capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
228        Value * const newLog2Capacity = iBuilder->CreateAdd(log2capacity, ONE);
229        Value * const newSummarySize = iBuilder->CreateShl(newLog2Capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
230
231        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
232        Value * newSummary = iBuilder->CreateAlignedMalloc(newSummarySize, BlockWidth);
233        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
234        iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
235        iBuilder->CreateAlignedFree(summary);
236
237        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
238        ptr1 = iBuilder->CreatePointerCast(ptr1, mCarryPackType->getPointerTo());
239
240        Value * ptr2 = iBuilder->CreateGEP(newSummary, iBuilder->CreateAdd(summarySize, iBuilder->getSize(BlockWidth)));
241        ptr2 = iBuilder->CreatePointerCast(ptr2, mCarryPackType->getPointerTo());
242
243        newSummary = iBuilder->CreatePointerCast(newSummary, mCarryPackType->getPointerTo());
244        iBuilder->CreateStore(newSummary, summaryPtr);
245        Value * const newCapacity = iBuilder->CreateShl(ONE, log2capacity);
246
247        iBuilder->CreateStore(newCapacity, capacityPtr);
248
249        iBuilder->CreateBr(resumeKernel);
250
251        // CREATE NEW
252        iBuilder->SetInsertPoint(createNew);
253
254        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
255        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
256        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
257
258        Value * initialArray = iBuilder->CreateAlignedMalloc(initialCapacitySize, iBuilder->getCacheAlignment());
259        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
260        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
261        iBuilder->CreateStore(initialArray, arrayPtr);
262
263        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
264        Value * initialSummary = iBuilder->CreateAlignedMalloc(initialSummarySize, BlockWidth);
265        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
266        initialSummary = iBuilder->CreatePointerCast(initialSummary, mCarryPackType->getPointerTo());
267        iBuilder->CreateStore(initialSummary, summaryPtr);
268
269        iBuilder->CreateStore(initialCapacity, capacityPtr);
270
271        iBuilder->CreateBr(resumeKernel);
272
273        // RESUME KERNEL
274        iBuilder->SetInsertPoint(resumeKernel);
275        // Load the appropriate carry stat block
276        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
277        phiArrayPtr->addIncoming(array, entry);
278        phiArrayPtr->addIncoming(initialArray, createNew);
279        phiArrayPtr->addIncoming(newArray, reallocExisting);
280
281        // note: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
282        mCarryFrame.emplace_back(mCurrentFrame, 3);
283        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
284    }
285}
286
287/** ------------------------------------------------------------------------------------------------------------- *
288 * @brief leaveLoopBody
289 ** ------------------------------------------------------------------------------------------------------------- */
290void CarryManager::leaveLoopBody(BasicBlock * /* exitBlock */) {
291
292    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
293
294        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrame.back();
295        mCarryFrame.pop_back();
296        assert (!mCarryInSummary.empty());
297        Value * carryInAccumulator = mCarryInSummary.back();
298        Value * carryOutAccumulator = mCarryOutSummary.back();
299
300        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
301        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
302        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
303        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
304        // (for the subsequent block to subtract) using a half-adder circuit.
305
306        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
307        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
308        // Otherwise we may end up with an incorrect result or being trapped in an infinite loop.
309
310        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
311        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
312        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
313        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
314
315        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
316
317        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
318
319        BasicBlock * entry = iBuilder->GetInsertBlock();
320        BasicBlock * update = mKernel->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
321        BasicBlock * resume = mKernel->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
322
323        iBuilder->CreateBr(update);
324
325        iBuilder->SetInsertPoint(update);
326        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
327        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
328        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
329        borrow->addIncoming(carryInAccumulator, entry);
330        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
331        carry->addIncoming(carryOutAccumulator, entry);
332        // OR the updated carry in summary later for the summaryTest
333        PHINode * const carryInSummary = iBuilder->CreatePHI(mCarryPackType, 2);
334        carryInSummary->addIncoming(Constant::getNullValue(mCarryPackType), entry);
335
336        // half subtractor
337        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
338        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
339        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
340        Value * const carryInPrime = iBuilder->CreateXor(carryIn, borrow);
341        Value * const finalCarryInSummary = iBuilder->CreateOr(carryInSummary, carryInPrime);
342        iBuilder->CreateBlockAlignedStore(carryInPrime, carryInPtr);
343        carryInSummary->addIncoming(finalCarryInSummary, update);
344        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
345        borrow->addIncoming(finalBorrow, update);
346
347        // half adder
348        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
349        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
350        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
351        Value * const carryOutPrime = iBuilder->CreateXor(carryOut, carry);
352        iBuilder->CreateBlockAlignedStore(carryOutPrime, carryOutPtr);
353        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
354        carry->addIncoming(finalCarry, update);
355
356        // loop condition
357        Value * n = iBuilder->CreateAdd(i, ONE);
358        i->addIncoming(n, update);
359        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
360
361        iBuilder->SetInsertPoint(resume);
362
363        IntegerType * ty = IntegerType::get(iBuilder->getContext(), iBuilder->getBitBlockWidth());
364        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalBorrow, ty), Constant::getNullValue(ty)), "borrow != 0");
365        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalCarry, ty), Constant::getNullValue(ty)), "carry != 0");
366
367        assert (!mLoopIndicies.empty());
368        PHINode * index = mLoopIndicies.back();
369        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
370        mLoopIndicies.pop_back();
371        mCarryInSummary.back() = finalCarryInSummary;
372    }
373    if (mCarryInfo->hasSummary()) {
374        const auto n = mCarryOutSummary.size(); assert (n > 1);
375        Value * carryOut = mCarryOutSummary.back();
376        mCarryOutSummary.pop_back();
377        PHINode * phiCarryOut = cast<PHINode>(mCarryOutSummary.back());
378        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
379            carryOut = iBuilder->CreateOr(phiCarryOut, carryOut);
380        }
381        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
382    }
383}
384
385/** ------------------------------------------------------------------------------------------------------------- *
386 * @brief leaveLoopScope
387 ** ------------------------------------------------------------------------------------------------------------- */
388void CarryManager::leaveLoopScope(BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
389    assert (mLoopDepth > 0);
390    --mLoopDepth;
391    leaveScope();
392}
393
394/** ------------------------------------------------------------------------------------------------------------- *
395 * @brief enterIfScope
396 ** ------------------------------------------------------------------------------------------------------------- */
397void CarryManager::enterIfScope(const PabloBlock * const scope) {
398    ++mIfDepth;
399    enterScope(scope);
400    mCarryOutSummary.push_back(Constant::getNullValue(mCarryPackType));
401}
402
403/** ------------------------------------------------------------------------------------------------------------- *
404 * @brief generateSummaryTest
405 ** ------------------------------------------------------------------------------------------------------------- */
406Value * CarryManager::generateSummaryTest(Value * condition) {
407    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
408        Value * summary = nullptr;
409        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
410            summary = mCarryInSummary.back();
411            mCarryInSummary.pop_back();
412        } else {
413            // enter the (potentially nested) struct and extract the summary element (always element 0)
414            unsigned count = 2;
415            if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
416                Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
417                count = 1;
418                while (frameTy->isStructTy()) {
419                    ++count;
420                    frameTy = frameTy->getStructElementType(0);
421                }
422            }
423            const bool useLoopSelector = mCarryInfo->hasImplicitSummary() && mLoopDepth > 0;
424            const auto length = count + (useLoopSelector ? 1 : 0);
425            Value * indicies[length];
426            std::fill(indicies, indicies + count, iBuilder->getInt32(0));
427            if (LLVM_UNLIKELY(useLoopSelector)) {
428                indicies[count] = mLoopSelector;
429            }
430            ArrayRef<Value *> ar(indicies, length);
431            Value * ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
432            // Sanity check: make sure we're accessing a summary value
433            assert (ptr->getType()->getPointerElementType()->canLosslesslyBitCastTo(condition->getType()));
434            summary = iBuilder->CreateBlockAlignedLoad(ptr);
435        }
436        condition = iBuilder->simd_or(condition, summary);
437    }
438    return condition;
439}
440
441/** ------------------------------------------------------------------------------------------------------------- *
442 * @brief enterIfBody
443 ** ------------------------------------------------------------------------------------------------------------- */
444void CarryManager::enterIfBody(BasicBlock * const entryBlock) { assert (entryBlock);
445
446}
447
448/** ------------------------------------------------------------------------------------------------------------- *
449 * @brief leaveIfBody
450 ** ------------------------------------------------------------------------------------------------------------- */
451void CarryManager::leaveIfBody(BasicBlock * const exitBlock) { assert (exitBlock);
452    const auto n = mCarryOutSummary.size();
453    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
454        assert (!mCarryOutSummary.empty());
455        Value * ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
456        Value * const value = iBuilder->CreateBitCast(mCarryOutSummary.back(), mBitBlockType);
457        iBuilder->CreateBlockAlignedStore(value, ptr);
458    }
459    if (n > 1) {
460        mCarryOutSummary[n - 1] = iBuilder->CreateOr(mCarryOutSummary[n - 1], mCarryOutSummary[n - 2], "summary");
461    }
462}
463
464/** ------------------------------------------------------------------------------------------------------------- *
465 * @brief leaveIfScope
466 ** ------------------------------------------------------------------------------------------------------------- */
467void CarryManager::leaveIfScope(BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
468    assert (mIfDepth > 0);
469    if (mCarryInfo->hasSummary()) {
470        const auto n = mCarryOutSummary.size(); assert (n > 0);
471        if (n > 1) {
472            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
473            // appropriate summary is stored in the outer scope.
474            Value * nested = mCarryOutSummary[n - 1];
475            Value * outer = mCarryOutSummary[n - 2];
476            if (LLVM_LIKELY(nested != outer)) {
477                assert (nested->getType() == outer->getType());
478                PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
479                phi->addIncoming(outer, entryBlock);
480                phi->addIncoming(nested, exitBlock);
481                mCarryOutSummary[n - 2] = phi;
482            }
483        }       
484    }
485    --mIfDepth;
486    leaveScope();
487    mCarryOutSummary.pop_back();
488}
489
490/** ------------------------------------------------------------------------------------------------------------ *
491 * @brief enterScope
492 ** ------------------------------------------------------------------------------------------------------------- */
493void CarryManager::enterScope(const PabloBlock * const scope) {
494    assert (scope);
495    // Store the state of the current frame and update the scope state
496    mCarryFrame.emplace_back(mCurrentFrame, mCurrentFrameIndex + 1);
497    mCurrentScope = scope;
498    mCarryScopeIndex.push_back(++mCarryScopes);
499    mCarryInfo = &mCarryMetadata[mCarryScopes];
500    // Check whether we're still within our struct bounds; if this fails, either the Pablo program changed during
501    // compilation or a memory corruption has occured.
502    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
503    mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
504    // Verify we're pointing to a carry frame struct
505    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
506    // We always use the 0-th slot for the summary value, even when it's implicit
507    mCurrentFrameIndex = mCarryInfo->hasExplicitSummary() ? 1 : 0;
508}
509
510/** ------------------------------------------------------------------------------------------------------------- *
511 * @brief leaveScope
512 ** ------------------------------------------------------------------------------------------------------------- */
513void CarryManager::leaveScope() {
514
515    // Did we use all of the packs in this carry struct?
516    assert (mCurrentFrameIndex == mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
517    // Sanity test: are there remaining carry frames?
518    assert (!mCarryFrame.empty());
519
520    std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrame.back();
521
522    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
523
524    mCarryFrame.pop_back();
525    mCarryScopeIndex.pop_back();
526    assert (!mCarryScopeIndex.empty());
527    mCurrentScope = mCurrentScope->getPredecessor();
528    assert (mCurrentScope);
529    mCarryInfo = &mCarryMetadata[mCarryScopeIndex.back()];
530}
531
532/** ------------------------------------------------------------------------------------------------------------- *
533 * @brief addCarryInCarryOut
534 ** ------------------------------------------------------------------------------------------------------------- */
535Value * CarryManager::addCarryInCarryOut(const Statement * const operation, Value * const e1, Value * const e2) {
536    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
537    Value * const carryIn = getNextCarryIn();
538    Value * carryOut, * result;
539    std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
540    setNextCarryOut(carryOut);
541    assert (result->getType() == mBitBlockType);
542    return result;
543}
544
545/** ------------------------------------------------------------------------------------------------------------- *
546 * @brief advanceCarryInCarryOut
547 ** ------------------------------------------------------------------------------------------------------------- */
548Value * CarryManager::advanceCarryInCarryOut(const Advance * const advance, Value * const value) {
549    const auto shiftAmount = advance->getAmount();
550    if (LLVM_LIKELY(shiftAmount <= mBitBlockWidth)) {
551        Value * const carryIn = getNextCarryIn();
552        Value * carryOut, * result;
553        if (LLVM_UNLIKELY(shiftAmount == mBitBlockWidth)) {
554            result = carryIn;
555            carryOut = value;
556        } else {
557            std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
558        }
559        setNextCarryOut(carryOut);
560        assert (result->getType() == mBitBlockType);
561        return result;
562    } else {
563        return longAdvanceCarryInCarryOut(shiftAmount, value);
564    }
565}
566
567/** ------------------------------------------------------------------------------------------------------------- *
568 * @brief longAdvanceCarryInCarryOut
569 ** ------------------------------------------------------------------------------------------------------------- */
570Value * CarryManager::longAdvanceCarryInCarryOut(const unsigned shiftAmount, Value * value) {
571
572    assert (shiftAmount > mBitBlockWidth);
573    assert (mHasLongAdvance);
574
575    Type * const streamVectorTy = iBuilder->getIntNTy(mBitBlockWidth);
576    value = iBuilder->CreateBitCast(value, mBitBlockType);
577    Value * buffer = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex++), iBuilder->getInt32(0)});
578
579    const unsigned blockShift = shiftAmount % mBitBlockWidth;
580    const unsigned entries = ceil_udiv(shiftAmount, mBitBlockWidth);
581
582    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
583        Value * const summaryPtr = iBuilder->CreateGEP(buffer, iBuilder->getInt32(0));
584        assert (summaryPtr->getType()->getPointerElementType() == mBitBlockType);
585        Value * carry = iBuilder->CreateZExtOrBitCast(iBuilder->bitblock_any(value), streamVectorTy);
586        const auto limit = ceil_udiv(shiftAmount, std::pow(mBitBlockWidth, 2));
587        assert (limit == summaryPtr->getType()->getPointerElementType()->getArrayNumElements());
588        for (unsigned i = 0;;++i) {
589            Value * ptr = iBuilder->CreateGEP(summaryPtr, iBuilder->getInt32(i));
590            Value * prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamVectorTy);
591            Value * stream = iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry);
592            if (LLVM_LIKELY(i == limit)) {
593                stream = iBuilder->CreateAnd(stream, iBuilder->bitblock_mask_from(iBuilder->getInt32(entries % mBitBlockWidth)));
594                addToSummary(stream);
595                iBuilder->CreateBlockAlignedStore(stream, ptr);               
596                buffer = iBuilder->CreateGEP(buffer, iBuilder->getInt32(1));
597                break;
598            }
599            addToSummary(stream);
600            iBuilder->CreateBlockAlignedStore(stream, ptr);
601            carry = iBuilder->CreateLShr(prior, mBitBlockWidth - 1);
602        }
603    }
604    assert (buffer->getType()->getPointerElementType() == mBitBlockType);
605
606    // Create a mask to implement circular buffer indexing
607    Value * indexMask = iBuilder->getSize(nearest_pow2(entries) - 1);   
608    Value * blockIndex = mKernel->getScalarField("CarryBlockIndex");
609    Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(entries));
610    Value * loadIndex0 = iBuilder->CreateAnd(carryIndex0, indexMask);
611    Value * storeIndex = iBuilder->CreateAnd(blockIndex, indexMask);
612    Value * carryIn = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(buffer, loadIndex0));
613    assert (carryIn->getType() == mBitBlockType);
614    // in non-carry collapsing mode, we need to accumulate the carry in value in order to properly subtract it from the
615    // carry in state in order to deduce whether we still have pending iterations even if the loop condition fails.
616    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
617        mCarryInSummary.back() = iBuilder->CreateOr(mCarryInSummary.back(), carryIn);
618    }
619    // If the long advance is an exact multiple of mBitBlockWidth, we simply return the oldest
620    // block in the long advance carry data area. 
621    if (blockShift == 0) {
622        iBuilder->CreateBlockAlignedStore(value, iBuilder->CreateGEP(buffer, storeIndex));
623        return carryIn;
624    }
625    // Otherwise we need to combine data from the two oldest blocks.
626    Value * carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(entries - 1));
627    Value * loadIndex1 = iBuilder->CreateAnd(carryIndex1, indexMask);
628    Value * carry_block1 = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(buffer, loadIndex1));
629    Value * block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamVectorTy), mBitBlockWidth - blockShift);
630    Value * block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, streamVectorTy), blockShift);
631    iBuilder->CreateBlockAlignedStore(value, iBuilder->CreateGEP(buffer, storeIndex));
632    return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
633}
634
635/** ------------------------------------------------------------------------------------------------------------- *
636 * @brief getNextCarryIn
637 ** ------------------------------------------------------------------------------------------------------------- */
638Value * CarryManager::getNextCarryIn() {
639    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
640    Value * carryInPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex++)});
641    mCarryPackPtr = carryInPtr;
642    if (mLoopDepth > 0) {
643        carryInPtr = iBuilder->CreateGEP(carryInPtr, {iBuilder->getInt32(0), mLoopSelector});       
644    }   
645    assert (carryInPtr->getType()->getPointerElementType() == mCarryPackType);
646    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
647    // in non-carry collapsing mode, we need to accumulate the carry in value in order to properly subtract it from the
648    // carry in state in order to deduce whether we still have pending iterations even if the loop condition fails.
649    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
650        mCarryInSummary.back() = iBuilder->CreateOr(mCarryInSummary.back(), carryIn);
651    }
652    if (mLoopDepth > 0) {
653        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(mCarryPackType), carryInPtr);
654    }
655    return carryIn;
656}
657
658/** ------------------------------------------------------------------------------------------------------------- *
659 * @brief setNextCarryOut
660 ** ------------------------------------------------------------------------------------------------------------- */
661void CarryManager::setNextCarryOut(Value * carryOut) {
662    if (mCarryInfo->hasExplicitSummary() || mCarryInfo->hasCountingSummary()) {
663        addToSummary(carryOut);
664    }
665    Value * carryOutPtr = mCarryPackPtr;
666    if (mLoopDepth > 0) {
667        Value * selector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
668        carryOutPtr = iBuilder->CreateGEP(mCarryPackPtr, {iBuilder->getInt32(0), selector});
669    }
670    carryOut = iBuilder->CreateBitCast(carryOut, mCarryPackType);
671    if (inCollapsingCarryMode()) {
672        Value * accum = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
673        carryOut = iBuilder->CreateOr(carryOut, accum);
674    }
675    assert (carryOutPtr->getType()->getPointerElementType() == mCarryPackType);
676    iBuilder->CreateBlockAlignedStore(carryOut, carryOutPtr);
677}
678
679/** ------------------------------------------------------------------------------------------------------------- *
680 * @brief addToSummary
681 ** ------------------------------------------------------------------------------------------------------------- */
682void CarryManager::addToSummary(Value * value) { assert (value);
683    assert (!mCarryOutSummary.empty());
684    Value * const summary = mCarryOutSummary.back(); assert (summary);
685    if (LLVM_UNLIKELY(summary == value)) {
686        return;  //Nothing to add.
687    }
688    value = iBuilder->CreateBitCast(value, mCarryPackType);
689    if (LLVM_UNLIKELY(isa<Constant>(value))) {
690        if (LLVM_UNLIKELY(cast<Constant>(value)->isZeroValue())) {
691            return;
692        } else if (LLVM_UNLIKELY(cast<Constant>(value)->isAllOnesValue())) {
693            mCarryOutSummary.back() = value;
694            return;
695        }
696    } else if (LLVM_UNLIKELY(isa<Constant>(summary))) {
697        if (LLVM_UNLIKELY(cast<Constant>(summary)->isZeroValue())) {
698            mCarryOutSummary.back() = value;
699            return;
700        } else if (LLVM_UNLIKELY(cast<Constant>(summary)->isAllOnesValue())) {
701            return;
702        }
703    }   
704    mCarryOutSummary.back() = iBuilder->CreateOr(summary, value);
705}
706
707/** ------------------------------------------------------------------------------------------------------------- *
708 * @brief collapsingCarryMode
709 ** ------------------------------------------------------------------------------------------------------------- */
710bool CarryManager::inCollapsingCarryMode() const {
711    return (mCurrentScope->getBranch() && isa<While>(mCurrentScope->getBranch()) && !mCarryInfo->hasCountingSummary());
712}
713
714/** ------------------------------------------------------------------------------------------------------------- *
715 * @brief enumerate
716 ** ------------------------------------------------------------------------------------------------------------- */
717unsigned CarryManager::getScopeCount(PabloBlock * const scope, unsigned index) {
718    for (Statement * stmt : *scope) {
719        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
720            index = getScopeCount(cast<Branch>(stmt)->getBody(), index);
721        }
722    }
723    return index + 1;
724}
725
726/** ------------------------------------------------------------------------------------------------------------- *
727 * @brief hasIterationSpecificAssignment
728 ** ------------------------------------------------------------------------------------------------------------- */
729bool CarryManager::hasIterationSpecificAssignment(const PabloBlock * const scope) {
730    if (const Branch * const br = scope->getBranch()) {
731        for (const Var * var : br->getEscaped()) {
732            for (const PabloAST * user : var->users()) {
733                if (const Extract * e = dyn_cast<Extract>(user)) {
734                    if (LLVM_UNLIKELY(e->getIndex() == var)) {
735                        // If we assign this Var a value and read the value as the index parameter
736                        // of a nested Extract statement, then we cannot collapse the carries.
737                        const PabloBlock * parent = e->getParent();
738                        for (;;) {
739                            if (parent == scope) {
740                                return true;
741                            }
742                            parent = parent->getPredecessor();
743                            if (parent == nullptr) {
744                                break;
745                            }
746                        }
747                    }
748                }
749            }
750        }
751    }
752    return false;
753}
754
755/** ------------------------------------------------------------------------------------------------------------- *
756 * @brief analyse
757 ** ------------------------------------------------------------------------------------------------------------- */
758StructType * CarryManager::analyse(PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth) {
759
760    assert (scope != mKernel->getEntryBlock() || mCarryScopes == 0);
761    assert (mCarryScopes < mCarryMetadata.size());
762    assert (mCarryPackType);
763
764    CarryData & cd = mCarryMetadata[mCarryScopes++];
765
766    std::vector<Type *> state;
767
768    Type * const carryPackType = (loopDepth == 0) ? mCarryPackType : ArrayType::get(mCarryPackType, 2);
769
770    bool hasLongAdvances = false;
771    for (Statement * stmt : *scope) {
772        if (LLVM_UNLIKELY(isa<Advance>(stmt))) {
773            const auto amount = cast<Advance>(stmt)->getAmount();
774            if (LLVM_LIKELY(amount <= mBitBlockWidth)) {
775                state.push_back(carryPackType);
776            } else {
777                const auto blocks = ceil_udiv(amount, mBitBlockWidth); assert (blocks > 1);
778                Type * type = ArrayType::get(mBitBlockType, nearest_pow2(blocks));
779                if (LLVM_UNLIKELY(ifDepth > 0)) {
780                    Type * carryType = ArrayType::get(mBitBlockType, ceil_udiv(amount, std::pow(mBitBlockWidth, 2)));
781                    type = StructType::get(carryType, type, nullptr);
782                    hasLongAdvances = true;                   
783                }
784                mHasLongAdvance = true;
785                state.push_back(type);
786            }
787        } else if (LLVM_UNLIKELY(isNonAdvanceCarryGeneratingStatement(stmt))) {
788            state.push_back(carryPackType);
789        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
790            state.push_back(analyse(cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth));
791        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
792            mHasLoop = true;
793            state.push_back(analyse(cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1));
794        }
795    }
796
797    StructType * carryState = nullptr;
798    // Add the summary pack if needed.
799    CarryData::SummaryType summaryType = CarryData::NoSummary;
800    if (LLVM_UNLIKELY(state.empty())) {
801        carryState = StructType::get(iBuilder->getContext());
802    } else {
803        const bool nonCarryCollapsingMode = loopDepth > 0 && hasIterationSpecificAssignment(scope);
804        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
805            summaryType = CarryData::CountingSummary;
806        } else if (ifDepth > 0) {
807            // A non-collapsing loop requires a unique summary for each iteration. Thus whenever we have a non-collapsing While
808            // within an If scope with an implicit summary, the If scope requires an explicit summary.
809            if (isa<If>(scope->getBranch())) {
810                if (LLVM_LIKELY(hasLongAdvances || state.size() > 1)) {
811                    summaryType = CarryData::ExplicitSummary;
812                    state.insert(state.begin(), mCarryPackType);
813                } else {
814                    summaryType = CarryData::ImplicitSummary;
815                    if (state[0]->isStructTy()) {
816                        summaryType = CarryData::BorrowedSummary;
817                    }
818                }
819            }
820        }
821        carryState = StructType::get(iBuilder->getContext(), state);
822        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
823        // carry state pointer, and summary pointer struct.
824        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
825            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), mCarryPackType->getPointerTo(), nullptr);
826        }
827    }
828    cd.setSummaryType(summaryType);
829
830    return carryState;
831}
832
833/** ------------------------------------------------------------------------------------------------------------- *
834 * @brief constructor
835 ** ------------------------------------------------------------------------------------------------------------- */
836CarryManager::CarryManager(IDISA::IDISA_Builder * idb) noexcept
837: iBuilder(idb)
838, mKernel(nullptr)
839, mSelf(nullptr)
840, mBitBlockType(idb->getBitBlockType())
841, mBitBlockWidth(idb->getBitBlockWidth())
842, mCurrentFrame(nullptr)
843, mCurrentFrameIndex(0)
844, mCurrentScope(nullptr)
845, mCarryInfo(nullptr)
846, mCarryPackType(mBitBlockType)
847, mCarryPackPtr(nullptr)
848, mIfDepth(0)
849, mHasLongAdvance(false)
850, mHasLoop(false)
851, mLoopDepth(0)
852, mLoopSelector(nullptr)
853, mCarryScopes(0) {
854
855}
856
857}
Note: See TracBrowser for help on using the repository browser.