source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 4655

Last change on this file since 4655 was 4655, checked in by cameron, 4 years ago

Simplify of unit advance with IDISA dslli improves performance

File size: 16.6 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <include/simd-lib/bitblock.hpp>
9#include <stdexcept>
10#include <carry_data.h>
11#include <codegenstate.h>
12#include <carry_manager.h>
13#include <pabloAST.h>
14#include <iostream>
15
16namespace pablo {
17 
18
19unsigned CarryManager::initialize(PabloBlock * pb, Value * carryPtr) {
20 
21    mPabloRoot = pb;
22    mCarryDataPtr = carryPtr;
23    iBuilder = new IDISA::IDISA_Builder(mMod, mBuilder, mBitBlockType);
24   
25    PabloBlockCarryData & cd = pb->carryData;
26    mTotalCarryDataSize = cd.enumerate(*pb) + 1;   // One extra element for the block no.
27    mBlockNoPtr = mBuilder->CreateBitCast(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(mTotalCarryDataSize - 1)), Type::getInt64PtrTy(mBuilder->getContext()));
28    mBlockNo = mBuilder->CreateLoad(mBlockNoPtr);
29    mCarryInVector.resize(mTotalCarryDataSize);
30    mCarryInPhis.resize(mTotalCarryDataSize);
31    mCarryOutAccumPhis.resize(mTotalCarryDataSize);
32    mCarryOutVector.resize(mTotalCarryDataSize);
33   
34    return mTotalCarryDataSize;
35}
36
37void CarryManager::generateBlockNoIncrement() {
38    mBuilder->CreateStore(mBuilder->CreateAdd(mBlockNo, mBuilder->getInt64(1)), mBlockNoPtr);
39}
40
41Value * CarryManager::getBlockNoPtr() {
42    return mBlockNoPtr;
43}
44
45
46    /* Methods for getting and setting individual carry values. */
47   
48Value * CarryManager::getCarryOpCarryIn(PabloBlock * blk, int localIndex) {
49    PabloBlockCarryData & cd = blk->carryData;
50    if (cd.getWhileDepth() == 0) {
51       Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(cd.carryOpCarryDataOffset(localIndex)));
52       mCarryInVector[cd.carryOpCarryDataOffset(localIndex)] = mBuilder->CreateAlignedLoad(packPtr, BLOCK_SIZE/8);
53    }
54    return mCarryInVector[cd.carryOpCarryDataOffset(localIndex)];
55}
56
57void CarryManager::setCarryOpCarryOut(PabloBlock * blk, unsigned idx, Value * carry_out) {
58    PabloBlockCarryData & cd = blk->carryData;
59    mCarryOutVector[cd.carryOpCarryDataOffset(idx)] = carry_out;
60    if (cd.getWhileDepth() == 0) {
61       Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(cd.carryOpCarryDataOffset(idx)));
62       mBuilder->CreateAlignedStore(carry_out, packPtr, BLOCK_SIZE/8);
63    }
64}
65
66   
67Value * CarryManager::advanceCarryInCarryOut(PabloBlock * blk, int localIndex, int shift_amount, Value * strm) {
68    if (shift_amount == 1) {
69        return unitAdvanceCarryInCarryOut(blk, localIndex, strm);
70    }
71    else if (shift_amount < LongAdvanceBase) {
72        return shortAdvanceCarryInCarryOut(blk, localIndex, shift_amount, strm);
73    }
74    else {
75        return longAdvanceCarryInCarryOut(blk, localIndex, shift_amount, strm);
76    }
77}
78
79Value * CarryManager::unitAdvanceCarryInCarryOut(PabloBlock * blk, int localIndex, Value * strm) {
80    PabloBlockCarryData & cd = blk->carryData;
81    unsigned carryDataIndex = cd.unitAdvanceCarryDataOffset(localIndex);
82    mCarryOutVector[carryDataIndex] = strm; 
83    if (cd.getWhileDepth() == 0) {
84        Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(carryDataIndex));
85        mCarryInVector[carryDataIndex] = mBuilder->CreateAlignedLoad(packPtr, BLOCK_SIZE/8);
86        mBuilder->CreateAlignedStore(strm, packPtr, BLOCK_SIZE/8);
87       
88    }
89    Value * carry_in = mCarryInVector[carryDataIndex];
90    Value* result_value;
91   
92#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
93    Value * ahead64 = iBuilder->mvmd_dslli(64, carry_in, strm, 1);
94    result_value = mBuilder->CreateOr(iBuilder->simd_srli(64, ahead64, 63), iBuilder->simd_slli(64, strm, 1));
95#else
96    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
97    Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
98    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
99    result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
100   
101#endif
102    return result_value;
103}
104
105Value * CarryManager::shortAdvanceCarryInCarryOut(PabloBlock * blk, int localIndex, int shift_amount, Value * strm) {
106    PabloBlockCarryData & cd = blk->carryData;
107    unsigned carryDataIndex = cd.shortAdvanceCarryDataOffset(localIndex);
108    mCarryOutVector[carryDataIndex] = strm; 
109    if (cd.getWhileDepth() == 0) {
110        Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(carryDataIndex));
111        mCarryInVector[carryDataIndex] = mBuilder->CreateAlignedLoad(packPtr, BLOCK_SIZE/8);
112        mBuilder->CreateAlignedStore(strm, packPtr, BLOCK_SIZE/8);
113       
114    }
115    Value * carry_in = mCarryInVector[carryDataIndex];
116    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
117    Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
118    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
119    return mBuilder->CreateBitCast(adv_longint, mBitBlockType);
120}
121   
122
123/*  currently defined in carry_data.h
124 
125 static unsigned power2ceil (unsigned v) {
126 unsigned ceil = 1;
127 while (ceil < v) ceil *= 2;
128 return ceil;
129 }
130 
131 unsigned longAdvanceEntries(unsigned shift_amount) const {
132 return (shift_amount + BLOCK_SIZE - 1)/BLOCK_SIZE;
133 }
134 
135 unsigned longAdvanceBufferSize(unsigned shift_amount)  const {
136 return power2ceil(longAdvanceEntries(shift_amount));
137 }
138 */
139
140   
141Value * CarryManager::longAdvanceCarryInCarryOut(PabloBlock * blk, int localIndex, int shift_amount, Value * carry_out) {
142    PabloBlockCarryData & cd = blk->carryData;
143    Value * advBaseIndex = mBuilder->getInt64(cd.longAdvanceCarryDataOffset(localIndex));
144    if (shift_amount <= BLOCK_SIZE) {
145        // special case using a single buffer entry and the carry_out value.
146        Value * advanceDataPtr = mBuilder->CreateGEP(mCarryDataPtr, advBaseIndex);
147        Value * carry_block0 = mBuilder->CreateAlignedLoad(advanceDataPtr, BLOCK_SIZE/8);
148        mBuilder->CreateAlignedStore(carry_out, advanceDataPtr, BLOCK_SIZE/8);
149        /* Very special case - no combine */
150        if (shift_amount == BLOCK_SIZE) return carry_block0;
151        Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(BLOCK_SIZE)), BLOCK_SIZE - shift_amount);
152        Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_out, mBuilder->getIntNTy(BLOCK_SIZE)), shift_amount);
153        return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
154    }
155    // We need a buffer of at least two elements for storing the advance data.
156    const unsigned block_shift = shift_amount % BLOCK_SIZE;
157    const unsigned advanceEntries = cd.longAdvanceEntries(shift_amount);
158    const unsigned bufsize = cd.longAdvanceBufferSize(shift_amount);
159    Value * indexMask = mBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
160    Value * loadIndex0 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
161    Value * storeIndex = mBuilder->CreateAdd(mBuilder->CreateAnd(mBlockNo, indexMask), advBaseIndex);
162    Value * carry_block0 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, loadIndex0), BLOCK_SIZE/8);
163    // If the long advance is an exact multiple of BLOCK_SIZE, we simply return the oldest
164    // block in the long advance carry data area. 
165    if (block_shift == 0) {
166        mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryDataPtr, storeIndex), BLOCK_SIZE/8);
167        return carry_block0;
168    }
169    // Otherwise we need to combine data from the two oldest blocks.
170    Value * loadIndex1 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries-1)), indexMask), advBaseIndex);
171    Value * carry_block1 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, loadIndex1), BLOCK_SIZE/8);
172    Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(BLOCK_SIZE)), BLOCK_SIZE - block_shift);
173    Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_block1, mBuilder->getIntNTy(BLOCK_SIZE)), block_shift);
174    mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryDataPtr, storeIndex), BLOCK_SIZE/8);
175    return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
176}
177   
178
179/* Methods for getting and setting carry summary values */
180   
181bool CarryManager::blockHasCarries(PabloBlock & blk){
182    PabloBlockCarryData & cd = blk.carryData;
183    return cd.getTotalCarryDataSize() > 0;
184} 
185
186Value * CarryManager::getCarrySummaryExpr(PabloBlock & blk) {
187    PabloBlockCarryData & cd = blk.carryData;
188    Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(cd.summaryCarryDataIndex()));
189    Value * summary_expr = mBuilder->CreateAlignedLoad(packPtr, BLOCK_SIZE/8);
190    // If the totalCarryDataSize is 1, then the carry summary is also the pack expr.
191    if (cd.getTotalCarryDataSize() == 1) {
192        mCarryInVector[cd.summaryCarryDataIndex()] = summary_expr;
193    }
194    return summary_expr;
195}
196
197bool CarryManager::summaryNeededInParentBlock(PabloBlock & blk){
198    PabloBlockCarryData & cd = blk.carryData;
199    return (cd.getIfDepth() > 0) && (cd.getTotalCarryDataSize() > 0);
200} 
201
202void CarryManager::addSummaryPhi(PabloBlock & blk, BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
203    const PabloBlockCarryData & cd = blk.carryData;
204    const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
205    PHINode * summary_phi = mBuilder->CreatePHI(mBitBlockType, 2, "summary");
206    summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
207    summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], ifBodyFinalBlock);
208    mCarryOutVector[carrySummaryIndex] = summary_phi;
209}
210
211void CarryManager::generateCarryOutSummaryCode(PabloBlock & blk) {
212   
213    const PabloBlockCarryData & cd = blk.carryData;
214    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
215    const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
216   
217    if (cd.getTotalCarryDataSize() == 1) {
218        // If totalCarryDataSize == 1, then we have one pack which serves as
219        // the summary.   It should already be stored.   
220        return;
221    }
222   
223    Value * carry_summary = mZeroInitializer;
224   
225    if (cd.blockHasLongAdvances() > 0) { // Force if entry
226        carry_summary = mOneInitializer;
227    }
228    else {
229        auto localCarryPacks = cd.getLocalCarryDataSize();
230        if (localCarryPacks > 0) {
231            carry_summary = mCarryOutVector[baseCarryDataIdx];
232            for (auto i = 1; i < localCarryPacks; i++) {
233                //carry_summary = mBuilder->CreateOr(carry_summary, mPabloBlock->mCarryOutPack[i]);           
234                carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutVector[baseCarryDataIdx+i]);
235            }
236        }
237        for (Statement * stmt : blk) {
238            if (If * innerIf = dyn_cast<If>(stmt)) {
239                PabloBlock & inner_blk = innerIf->getBody();
240                if (inner_blk.carryData.blockHasCarries()) {
241                  //carry_summary = mBuilder->CreateOr(carry_summary, inner_blk.mCarryOutSummary);
242                  carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutVector[inner_blk.carryData.summaryCarryDataIndex()]);
243                }
244            }
245            else if (While * innerWhile = dyn_cast<While>(stmt)) {
246                PabloBlock & inner_blk = innerWhile->getBody();
247                if (inner_blk.carryData.blockHasCarries()) 
248                  //carry_summary = mBuilder->CreateOr(carry_summary, inner_blk.mCarryOutSummary);
249                  carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutVector[inner_blk.carryData.summaryCarryDataIndex()]);
250            }
251        }
252    }
253    // Calculation of the carry out summary is complete.   Store it and make it
254    // available in case it must included by parent blocks.
255    mCarryOutVector[carrySummaryIndex] = carry_summary;
256    Value * packPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(carrySummaryIndex));
257    mBuilder->CreateAlignedStore(carry_summary, packPtr, BLOCK_SIZE/8);
258}
259
260void CarryManager::ensureCarriesLoadedLocal(PabloBlock & blk) {
261#if 0
262    const PabloBlockCarryData & cd = blk.carryData;
263    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
264    const unsigned localCarryDataSize = cd.getLocalCarryDataSize();
265    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
266    if (totalCarryDataSize == 0) return;
267    if ((cd.getIfDepth() > 0) && (totalCarryDataSize == 1)) return;
268    if (cd.getWhileDepth() > 0) return;
269    for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + localCarryDataSize; ++i) {
270        mCarryInVector[i] = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(i)), BLOCK_SIZE/8, false);
271    }
272#endif
273}
274
275void CarryManager::ensureCarriesStoredLocal(PabloBlock & blk) {
276#if 0
277    const PabloBlockCarryData & cd = blk.carryData;
278    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
279    const unsigned localCarryDataSize = cd.getLocalCarryDataSize();
280    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
281    const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
282    if (totalCarryDataSize == 0) return;
283    if (cd.getWhileDepth() > 0) return;
284    for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + localCarryDataSize; ++i) {
285        Value * storePtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(i));
286        mBuilder->CreateAlignedStore(mCarryOutVector[i], storePtr, BLOCK_SIZE/8, false);
287    }
288    if (totalCarryDataSize > 1) {
289        Value * summaryPtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(carrySummaryIndex));
290        mBuilder->CreateAlignedStore(mCarryOutVector[carrySummaryIndex], summaryPtr, BLOCK_SIZE/8, false);
291    }
292#endif
293}
294
295
296void CarryManager::ensureCarriesLoadedRecursive(PabloBlock & whileBlk) {
297    const PabloBlockCarryData & cd = whileBlk.carryData;
298    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
299    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
300    if (cd.getWhileDepth() == 1) {
301        for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + totalCarryDataSize; ++i) {
302            mCarryInVector[i] = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(i)), BLOCK_SIZE/8, false);
303        }
304    }
305}
306
307
308void CarryManager::initializeCarryDataPhisAtWhileEntry(PabloBlock & whileBlk, BasicBlock * whileEntryBlock) {
309    const PabloBlockCarryData & cd = whileBlk.carryData;
310    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
311    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
312    for (unsigned index = baseCarryDataIdx; index < baseCarryDataIdx + totalCarryDataSize; ++index) {
313#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
314        PHINode * phi_in = mBuilder->CreatePHI(mBitBlockType, 2);
315        phi_in->addIncoming(mCarryInVector[index], whileEntryBlock);
316        mCarryInPhis[index] = phi_in;
317#endif
318        PHINode * phi_out = mBuilder->CreatePHI(mBitBlockType, 2);
319        phi_out->addIncoming(mZeroInitializer, whileEntryBlock);
320        mCarryOutAccumPhis[index] = phi_out;
321    }
322}
323
324
325void CarryManager::extendCarryDataPhisAtWhileBodyFinalBlock(PabloBlock & whileBlk, BasicBlock * whileBodyFinalBlock) {
326    const PabloBlockCarryData & cd = whileBlk.carryData;
327    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
328    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
329    for (unsigned index = baseCarryDataIdx; index < baseCarryDataIdx + totalCarryDataSize; ++index) {
330#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
331        mCarryInPhis[index]->addIncoming(mZeroInitializer, whileBodyFinalBlock);
332#endif
333        PHINode * phi = mCarryOutAccumPhis[index];
334        Value * carryOut = mBuilder->CreateOr(phi, mCarryOutVector[index]);
335        phi->addIncoming(carryOut, whileBodyFinalBlock);
336        mCarryOutVector[index] = carryOut;
337    }
338}
339
340void CarryManager::ensureCarriesStoredRecursive(PabloBlock & whileBlk) {
341    const PabloBlockCarryData & cd = whileBlk.carryData;
342    const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
343    const unsigned totalCarryDataSize = cd.getTotalCarryDataSize();
344    if (cd.getWhileDepth() == 1) {
345        for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + totalCarryDataSize; ++i) {
346            Value * storePtr = mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(i));
347            mBuilder->CreateAlignedStore(mCarryOutVector[i], storePtr, BLOCK_SIZE/8, false);
348        }
349    }
350}
351
352}
353
Note: See TracBrowser for help on using the repository browser.