source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 4792

Last change on this file since 4792 was 4792, checked in by cameron, 4 years ago

Fix segfaults at page boundary

File size: 33.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <include/simd-lib/bitblock.hpp>
9#include <stdexcept>
10#include <pablo/carry_data.h>
11#include <pablo/codegenstate.h>
12#include <pablo/carry_manager.h>
13#include <pablo/pabloAST.h>
14#include <iostream>
15#include <llvm/Support/CommandLine.h>
16#include <llvm/IR/BasicBlock.h>
17#include <llvm/IR/CallingConv.h>
18#include <llvm/IR/Function.h>
19
20
21static cl::opt<CarryManagerStrategy> Strategy(cl::desc("Choose carry management strategy:"),
22                                              cl::values(
23                                                         clEnumVal(BitBlockStrategy, "Unpacked, each carry in a separate bitblock."),
24                                                         clEnumVal(SequentialFullyPackedStrategy, "Sequential packing, up to 64 carries per pack."),
25                                                         clEnumValEnd));
26
27
28namespace pablo {
29 
30    unsigned doScopeCount(PabloBlock * pb) {
31        unsigned count = 1;
32       
33        for (Statement * stmt : *pb) {
34            if (If * ifStatement = dyn_cast<If>(stmt)) {
35                count += doScopeCount(&ifStatement->getBody());
36            }
37            else if (While * whileStatement = dyn_cast<While>(stmt)) {
38                count += doScopeCount(&whileStatement->getBody());
39            }
40        }
41        return count;
42       
43    }
44
45void CarryManager::initialize(Module * m, PabloBlock * pb) {
46    mPabloRoot = pb;
47    unsigned scopeCount = doScopeCount(pb);
48    mCarryInfoVector.resize(scopeCount);
49    if (Strategy == SequentialFullyPackedStrategy) {
50        mPACK_SIZE = 64;
51        mITEMS_PER_PACK = 64;
52        mCarryPackType = mBuilder->getIntNTy(mPACK_SIZE);
53        mZeroInitializer = mBuilder->getInt64(0);
54        mOneInitializer = mBuilder->getInt64(-1);
55    }
56    else {
57        mPACK_SIZE = BLOCK_SIZE;
58        mITEMS_PER_PACK = 1;
59        mCarryPackType = mBitBlockType;
60    }
61    unsigned totalCarryDataSize = enumerate(pb, 0, 0);
62   
63    unsigned totalPackCount = (totalCarryDataSize + mITEMS_PER_PACK - 1)/mITEMS_PER_PACK;
64
65    mCarryPackPtr.resize(totalPackCount);
66    mCarryInPack.resize(totalPackCount);
67    mCarryOutPack.resize(totalPackCount);
68    for (unsigned i = 0; i < totalPackCount; i++) mCarryInPack[i]=nullptr;
69
70    if (Strategy == SequentialFullyPackedStrategy) {
71        mTotalCarryDataBitBlocks = (totalCarryDataSize + BLOCK_SIZE - 1)/BLOCK_SIZE;       
72    }
73    else {
74        mTotalCarryDataBitBlocks = totalCarryDataSize;
75    }
76   
77    ArrayType* cdArrayTy = ArrayType::get(mBitBlockType, mTotalCarryDataBitBlocks);
78    GlobalVariable* cdArray = new GlobalVariable(*m, cdArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, /*Initializer=*/0, "process_block_carry_data");
79    cdArray->setAlignment(BLOCK_SIZE/8);
80    ConstantAggregateZero* cdInitData = ConstantAggregateZero::get(cdArrayTy);
81    cdArray->setInitializer(cdInitData);
82   
83    mCarryPackBasePtr = mBuilder->CreateBitCast(cdArray, PointerType::get(mCarryPackType, 0));
84    mCarryBitBlockPtr = mBuilder->CreateBitCast(cdArray, PointerType::get(mBitBlockType, 0));
85   
86    // Popcount data is stored after all the carry data.
87    if (mPabloCountCount > 0) {
88        ArrayType* pcArrayTy = ArrayType::get(mBuilder->getIntNTy(64), mPabloCountCount);
89        GlobalVariable* pcArray = new GlobalVariable(*m, pcArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, 0, "popcount_data");
90        cdArray->setAlignment(BLOCK_SIZE/8);
91        ConstantAggregateZero* pcInitData = ConstantAggregateZero::get(pcArrayTy);
92        pcArray->setInitializer(pcInitData);
93        mPopcountBasePtr = mBuilder->CreateBitCast(pcArray, Type::getInt64PtrTy(mBuilder->getContext()));
94    }
95    // Carry Data area will have one extra bit block to store the block number.
96    GlobalVariable* blkNo = new GlobalVariable(*m, mBuilder->getIntNTy(64), /*isConstant=*/false, GlobalValue::CommonLinkage, 0, "blockNo");
97    blkNo->setAlignment(16);
98    blkNo->setInitializer(mBuilder->getInt64(0));
99    mBlockNoPtr = blkNo;
100    mBlockNo = mBuilder->CreateLoad(mBlockNoPtr);
101    /*  Set the current scope to PabloRoot */
102    mCurrentScope = mPabloRoot;
103    mCurrentFrameIndex = 0;
104    mCarryInfo = mCarryInfoVector[0];
105}
106   
107void CarryManager::generateBlockNoIncrement() {
108    mBuilder->CreateStore(mBuilder->CreateAdd(mBlockNo, mBuilder->getInt64(1)), mBlockNoPtr);
109}
110
111Value * CarryManager::getBlockNoPtr() {
112    return mBlockNoPtr;
113}
114
115
116unsigned CarryManager::enumerate(PabloBlock * blk, unsigned ifDepth, unsigned whileDepth) {
117    llvm::raw_os_ostream cerr(std::cerr);
118    unsigned idx = blk->getScopeIndex();
119    PabloBlockCarryData * cd = new PabloBlockCarryData(blk, mPACK_SIZE, mITEMS_PER_PACK);
120    mCarryInfoVector[idx] = cd;
121
122    cd->setIfDepth(ifDepth);
123    cd->setWhileDepth(whileDepth);
124    unsigned nestedOffset = cd->nested.frameOffset;
125 
126    for (Statement * stmt : *blk) {
127        if (Count * c = dyn_cast<Count>(stmt)) {
128            c->setGlobalCountIndex(mPabloCountCount);
129            mPabloCountCount++;
130        }
131        else if (If * ifStatement = dyn_cast<If>(stmt)) {
132            const unsigned ifCarryDataBits = enumerate(&ifStatement->getBody(), ifDepth+1, whileDepth);
133            PabloBlockCarryData * nestedBlockData = mCarryInfoVector[ifStatement->getBody().getScopeIndex()];
134            if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
135                if (cd->roomInFinalPack(nestedOffset) < ifCarryDataBits) {
136                    nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
137                }
138            }
139            nestedBlockData->setFramePosition(nestedOffset);
140
141            nestedOffset += ifCarryDataBits;
142            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
143            cd->nested.entries++;
144#ifdef CARRY_DEBUG
145            nestedBlockData->dumpCarryData(cerr);
146#endif
147        }
148        else if (While * whileStatement = dyn_cast<While>(stmt)) {
149            const unsigned whileCarryDataBits = enumerate(&whileStatement->getBody(), ifDepth, whileDepth+1);
150            PabloBlockCarryData * nestedBlockData = mCarryInfoVector[whileStatement->getBody().getScopeIndex()];
151            //if (whileStatement->isMultiCarry()) whileCarryDataBits *= whileStatement->getMaxIterations();
152            if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
153                if (cd->roomInFinalPack(nestedOffset) < whileCarryDataBits) {
154                    nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
155                }
156            }
157            nestedBlockData->setFramePosition(nestedOffset);
158            nestedOffset += whileCarryDataBits;
159            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
160            cd->nested.entries++;
161#ifdef CARRY_DEBUG
162            nestedBlockData->dumpCarryData(cerr);
163#endif
164        }
165    }
166   
167    cd->scopeCarryDataSize = nestedOffset;
168   
169    if (cd->explicitSummaryRequired()) {
170        // Need extra space for the summary variable, always the last
171        // entry within an if block.
172        if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
173            cd->scopeCarryDataSize = alignCeiling(cd->scopeCarryDataSize, mPACK_SIZE);
174        }
175        cd->summary.frameOffset = cd->scopeCarryDataSize;
176        cd->scopeCarryDataSize += mITEMS_PER_PACK;  //  computed summary is a full pack.
177    }
178    else {
179        cd->summary.frameOffset = 0;
180    }
181#ifdef CARRY_DEBUG
182    if (cd->ifDepth == 0) cd->dumpCarryData(cerr);
183#endif
184    return cd->scopeCarryDataSize;
185}
186
187
188/* Entering and leaving blocks. */
189
190void CarryManager::enterScope(PabloBlock * blk) {
191   
192    mCurrentScope = blk;
193    mCarryInfo = mCarryInfoVector[blk->getScopeIndex()];
194    mCurrentFrameIndex += mCarryInfo->getFrameIndex();
195    //std::cerr << "enterScope:  blk->getScopeIndex() = " << blk->getScopeIndex() << ", mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
196}
197
198void CarryManager::leaveScope() {
199    mCurrentFrameIndex -= mCarryInfo->getFrameIndex();
200    if (mCurrentScope != mPabloRoot) {
201        mCurrentScope = mCurrentScope->getParent();
202        mCarryInfo = mCarryInfoVector[mCurrentScope->getScopeIndex()];
203    }
204    //std::cerr << "leaveScope:  mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
205}
206
207
208/* Helper routines */
209
210unsigned CarryManager::absPosition(unsigned frameOffset, unsigned relPos) {
211    return mCurrentFrameIndex + frameOffset + relPos;
212}
213
214
215unsigned CarryManager::carryOpPosition(unsigned localIndex) {
216    //std::cerr << "carryOpPosition: addWithCarry.frameOffset = " << mCarryInfo->addWithCarry.frameOffset << ", localIndex = " <<localIndex << std::endl;
217    return absPosition(mCarryInfo->addWithCarry.frameOffset, localIndex);
218}
219
220unsigned CarryManager::advance1Position(unsigned localIndex) {
221    //std::cerr << "unsigned CarryManager::advance1Position: advance1.frameOffset = " << mCarryInfo->advance1.frameOffset << ", localIndex = " <<localIndex << std::endl;
222    return absPosition(mCarryInfo->advance1.frameOffset, localIndex);
223}
224
225unsigned CarryManager::shortAdvancePosition(unsigned localIndex) {
226    return absPosition(mCarryInfo->shortAdvance.frameOffset, localIndex);
227}
228
229unsigned CarryManager::longAdvanceBitBlockPosition(unsigned localIndex) {
230    return (mCurrentFrameIndex + mCarryInfo->longAdvance.frameOffset) / mITEMS_PER_PACK + localIndex;
231}
232   
233unsigned CarryManager::localBasePack() {
234    return (mCurrentFrameIndex + mCarryInfo->shortAdvance.frameOffset) / mITEMS_PER_PACK;
235}
236   
237unsigned CarryManager::scopeBasePack() {
238    return mCurrentFrameIndex / mITEMS_PER_PACK;
239}
240   
241
242
243unsigned CarryManager::summaryPosition() {
244    return absPosition(mCarryInfo->summary.frameOffset, 0);
245}
246
247
248unsigned CarryManager::summaryPackIndex() {
249    return summaryPosition()/mITEMS_PER_PACK;
250}
251
252unsigned CarryManager::summaryBits() {
253    if (mCarryInfo->scopeCarryDataSize > mITEMS_PER_PACK) return mPACK_SIZE;
254    else return mCarryInfo->scopeCarryDataSize;
255}
256
257
258
259Value * CarryManager::getCarryPack(unsigned packIndex) {
260    if (mCarryInPack[packIndex] == nullptr) {
261        Value * packPtr = mBuilder->CreateGEP(mCarryPackBasePtr, mBuilder->getInt64(packIndex));
262        // Save the computed pointer - so that it can be used in storeCarryPack.
263        mCarryPackPtr[packIndex] = packPtr;
264        mCarryInPack[packIndex] = mBuilder->CreateAlignedLoad(packPtr, mPACK_SIZE/8);
265    }
266    return mCarryInPack[packIndex];
267}
268
269void CarryManager::storeCarryPack(unsigned packIndex) {
270    mBuilder->CreateAlignedStore(mCarryOutPack[packIndex], mCarryPackPtr[packIndex], mPACK_SIZE/8);
271}
272
273   
274/* maskSelectBitRange selects the bits of a pack from lo_bit through
275   lo_bit + bitCount - 1, setting all other bits to zero.  */
276   
277Value * CarryManager::maskSelectBitRange(Value * pack, unsigned lo_bit, unsigned bitCount) {
278    if (bitCount == mPACK_SIZE) {
279        assert(lo_bit == 0);
280        return pack;
281    }
282    uint64_t mask = ((((uint64_t) 1) << bitCount) - 1) << lo_bit;
283    return mBuilder->CreateAnd(pack, mBuilder->getInt64(mask));
284}
285   
286Value * CarryManager::getCarryInBits(unsigned carryBitPos, unsigned carryBitCount) {
287    unsigned packIndex = carryBitPos / mPACK_SIZE;
288    unsigned packOffset = carryBitPos % mPACK_SIZE;
289    Value * selected = maskSelectBitRange(getCarryPack(packIndex), packOffset, carryBitCount);
290    if (packOffset == 0) return selected;
291    return mBuilder->CreateLShr(selected, packOffset);
292}
293
294void CarryManager::extractAndSaveCarryOutBits(Value * bitblock, unsigned carryBit_pos, unsigned carryBitCount) {
295    unsigned packIndex = carryBit_pos / mPACK_SIZE;
296    unsigned packOffset = carryBit_pos % mPACK_SIZE;
297    unsigned rshift = mPACK_SIZE - packOffset - carryBitCount;
298    uint64_t mask = ((((uint64_t) 1) << carryBitCount) - 1)  << packOffset;
299    //std::cerr << "extractAndSaveCarryOutBits: packIndex =" << packIndex << ", packOffset = " << packOffset << ", mask = " << mask << std::endl;
300    Value * field = iBuilder->mvmd_extract(mPACK_SIZE, bitblock, BLOCK_SIZE/mPACK_SIZE - 1);
301    //Value * field = maskSelectBitRange(field, PACK_SIZE - carryBitCount, carryBitCount);
302    if (rshift != 0) {
303        field = mBuilder->CreateLShr(field, mBuilder->getInt64(rshift));
304    }
305    if (packOffset != 0) {
306        field = mBuilder->CreateAnd(field, mBuilder->getInt64(mask));
307    }
308    if (mCarryOutPack[packIndex] == nullptr) {
309        mCarryOutPack[packIndex] = field;
310    }
311    else {
312        mCarryOutPack[packIndex] = mBuilder->CreateOr(mCarryOutPack[packIndex], field);
313    }
314}
315
316Value * CarryManager::pack2bitblock(Value * pack) {
317    return mBuilder->CreateBitCast(mBuilder->CreateZExt(pack, mBuilder->getIntNTy(BLOCK_SIZE)), mBitBlockType);
318}
319   
320   
321// Use field size 32 for BLOCK_SIZE 256, so that signmasks are i8.
322#if (BLOCK_SIZE==256)
323//#define PARALLEL_LONG_ADD
324#define PARALLEL_LONG_ADD_DIGIT_SIZE 32
325#endif
326   
327/* Methods for getting and setting individual carry values. */
328   
329Value * CarryManager::getCarryOpCarryIn(int localIndex) {
330    unsigned posn = carryOpPosition(localIndex);
331    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
332        return pack2bitblock(getCarryInBits(posn, 1));
333    }
334    else {
335        return getCarryPack(posn);
336    }
337}
338
339   
340void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) {
341    unsigned posn = carryOpPosition(localIndex);
342#ifndef PARALLEL_LONG_ADD
343    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
344        extractAndSaveCarryOutBits(carry_out_strm, posn, 1);
345    }
346    else {
347        Value * carry_bit = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_out_strm, mBuilder->getIntNTy(BLOCK_SIZE)), BLOCK_SIZE-1);
348        mCarryOutPack[posn] = mBuilder->CreateBitCast(carry_bit, mBitBlockType);
349        if (mCarryInfo->getWhileDepth() == 0) {
350            storeCarryPack(posn);
351        }
352    }
353#else
354    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
355        // Carry is at low bit position
356        unsigned packIndex = posn / mPACK_SIZE;
357        unsigned packOffset = posn % mPACK_SIZE;
358        Value * field = mBuilder->CreateZExt(carry_out_strm, mBuilder->getIntNTy(mPACK_SIZE));
359        if (packOffset != 0) {
360            field = mBuilder->CreateShl(field, mBuilder->getInt64(packOffset));
361        }
362        if (mCarryOutPack[packIndex] == nullptr) {
363            mCarryOutPack[packIndex] = field;
364        }
365        else {
366            mCarryOutPack[packIndex] = mBuilder->CreateOr(mCarryOutPack[packIndex], field);
367        }       
368    }
369    else {
370        Value * carry_bit = mBuilder->CreateZExt(carry_out_strm, mBuilder->getIntNTy(BLOCK_SIZE));
371        mCarryOutPack[posn] = mBuilder->CreateBitCast(carry_bit, mBitBlockType);
372        if (mCarryInfo->getWhileDepth() == 0) {
373            storeCarryPack(posn);
374        }
375    }
376
377   
378#endif
379}
380
381   
382   
383Value* CarryManager::genShiftLeft64(Value* e) {
384    Value* i128_val = mBuilder->CreateBitCast(e, mBuilder->getIntNTy(BLOCK_SIZE));
385    return mBuilder->CreateBitCast(mBuilder->CreateShl(i128_val, 64), mBitBlockType);
386}
387
388Value* MatchStar(IRBuilder<> * b, Value * m, Value * c) {
389    return b->CreateOr(b->CreateXor(b->CreateAdd(b->CreateAnd(m, c), c), c), m);
390}
391       
392Value * CarryManager::addCarryInCarryOut(int localIndex, Value* e1, Value* e2) {
393#if (BLOCK_SIZE == 128)
394    Value * carryq_value = getCarryOpCarryIn(localIndex);
395    //calculate carry through logical ops
396    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
397    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
398    Value* digitsum = mBuilder->CreateAdd(e1, e2, "digitsum");
399    Value* partial = mBuilder->CreateAdd(digitsum, carryq_value, "partial");
400    Value* digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, mBuilder->CreateNot(partial)));
401    Value* mid_carry_in = genShiftLeft64(mBuilder->CreateLShr(digitcarry, 63));
402    Value* sum = mBuilder->CreateAdd(partial, mBuilder->CreateBitCast(mid_carry_in, mBitBlockType), "sum");
403    Value* carry_out_strm = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, mBuilder->CreateNot(sum)));
404    setCarryOpCarryOut(localIndex, carry_out_strm);
405    return sum;
406#elif (defined(PARALLEL_LONG_ADD))
407    //BLOCK_SIZE == 256, there is no other implementation
408    Type * longAddVectorType = VectorType::get(mBuilder->getIntNTy(PARALLEL_LONG_ADD_DIGIT_SIZE), BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE);
409    Type * longAddBitMaskIntegerType = mBuilder->getIntNTy(BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE);
410    Type * longAddBitMaskVectorType = VectorType::get(mBuilder->getIntNTy(1), BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE);
411    // double the mask size to allow room for carry-out.
412    Type * longAddBitMaskManipulationType = mBuilder->getIntNTy(2 * BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE);
413    Value * all_ones = Constant::getAllOnesValue(longAddVectorType);
414    Value * carryin = iBuilder->mvmd_extract(2 * BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE, getCarryOpCarryIn(localIndex), 0);
415    Value * carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
416    Value * carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
417    // Sum individual digits.
418    Value * digitsum = iBuilder->simd_add(PARALLEL_LONG_ADD_DIGIT_SIZE, e1, e2);
419    Value * digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, mBuilder->CreateNot(digitsum)));
420    Value * carry_mask = mBuilder->CreateZExt(iBuilder->hsimd_signmask(PARALLEL_LONG_ADD_DIGIT_SIZE, digitcarry), longAddBitMaskManipulationType);
421    Value * bubble_fields = iBuilder->simd_eq(PARALLEL_LONG_ADD_DIGIT_SIZE, digitsum, all_ones);
422    Value * bubble_mask = mBuilder->CreateZExt(iBuilder->hsimd_signmask(PARALLEL_LONG_ADD_DIGIT_SIZE, bubble_fields), longAddBitMaskManipulationType);
423    Value * carry_markers = mBuilder->CreateAdd(mBuilder->CreateAdd(carry_mask, carry_mask), carryin); 
424    Value * increments = MatchStar(mBuilder, carry_markers, bubble_mask);
425    Value * carry_out = mBuilder->CreateLShr(increments, BLOCK_SIZE/PARALLEL_LONG_ADD_DIGIT_SIZE);
426    Value * spread = mBuilder->CreateZExt(mBuilder->CreateBitCast(mBuilder->CreateTrunc(increments, longAddBitMaskIntegerType), longAddBitMaskVectorType), longAddVectorType);
427    Value* sum = iBuilder->simd_add(PARALLEL_LONG_ADD_DIGIT_SIZE, digitsum, spread);
428    setCarryOpCarryOut(localIndex, carry_out);
429    return sum;
430#else
431    //BLOCK_SIZE == 256, default implementation
432    Value * carryq_value = getCarryOpCarryIn(localIndex);
433    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
434    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
435    Value * sum = iBuilder->simd_add(BLOCK_SIZE, iBuilder->simd_add(BLOCK_SIZE, e1, e2), carryq_value);
436    Value* carry_out_strm = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, mBuilder->CreateNot(sum)));
437    setCarryOpCarryOut(localIndex, carry_out_strm);
438    return sum;
439#endif         
440}
441
442
443Value * CarryManager::advanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
444    if (shift_amount == 1) {
445        return unitAdvanceCarryInCarryOut(localIndex, strm);
446    }
447    else if (shift_amount < LongAdvanceBase) {
448        return shortAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
449    }
450    else {
451        return longAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
452    }
453}
454
455Value * CarryManager::unitAdvanceCarryInCarryOut(int localIndex, Value * strm) {
456    unsigned posn = advance1Position(localIndex);
457    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
458        extractAndSaveCarryOutBits(strm, posn, 1);
459        Value* carry_longint = mBuilder->CreateZExt(getCarryInBits(posn, 1), mBuilder->getIntNTy(BLOCK_SIZE));
460        Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
461        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), carry_longint);
462        Value* result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
463        return result_value;
464    }
465    mCarryOutPack[posn] = strm;
466    Value * carry_in = getCarryPack(posn);
467    if (mCarryInfo->getWhileDepth() == 0) {
468        storeCarryPack(posn);
469    }
470    Value* result_value;
471   
472#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
473    Value * ahead64 = iBuilder->mvmd_dslli(64, carry_in, strm, 1);
474    result_value = mBuilder->CreateOr(iBuilder->simd_srli(64, ahead64, 63), iBuilder->simd_slli(64, strm, 1));
475#else
476    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
477    Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
478    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
479    result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
480   
481#endif
482    return result_value;
483}
484
485Value * CarryManager::shortAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
486    unsigned posn = shortAdvancePosition(localIndex);
487    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
488        extractAndSaveCarryOutBits(strm, posn, shift_amount);
489        //std::cerr << "shortAdvanceCarryInCarryOut: posn = " << posn << ", shift_amount = " << shift_amount << std::endl;
490        Value* carry_longint = mBuilder->CreateZExt(getCarryInBits(posn, shift_amount), mBuilder->getIntNTy(BLOCK_SIZE));
491        Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
492        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), carry_longint);
493        Value* result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
494        return result_value;
495    }
496    mCarryOutPack[posn] = strm;
497    Value * carry_in = getCarryPack(posn);
498    if (mCarryInfo->getWhileDepth() == 0) {
499        storeCarryPack(posn);
500    }
501    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(BLOCK_SIZE));
502    Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(BLOCK_SIZE));
503    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
504    return mBuilder->CreateBitCast(adv_longint, mBitBlockType);
505}
506   
507
508/*  currently defined in carry_data.h
509 
510 static unsigned power2ceil (unsigned v) {
511 unsigned ceil = 1;
512 while (ceil < v) ceil *= 2;
513 return ceil;
514 }
515 
516 unsigned longAdvanceEntries(unsigned shift_amount) const {
517 return (shift_amount + BLOCK_SIZE - 1)/BLOCK_SIZE;
518 }
519 
520 unsigned longAdvanceBufferSize(unsigned shift_amount)  const {
521 return power2ceil(longAdvanceEntries(shift_amount));
522 }
523 */
524   
525Value * CarryManager::longAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * carry_out) {
526    unsigned carryDataIndex = longAdvanceBitBlockPosition(localIndex);
527    Value * advBaseIndex = mBuilder->getInt64(carryDataIndex);
528    if (shift_amount <= BLOCK_SIZE) {
529        // special case using a single buffer entry and the carry_out value.
530        Value * advanceDataPtr = mBuilder->CreateGEP(mCarryBitBlockPtr, advBaseIndex);
531        Value * carry_block0 = mBuilder->CreateAlignedLoad(advanceDataPtr, BLOCK_SIZE/8);
532        mBuilder->CreateAlignedStore(carry_out, advanceDataPtr, BLOCK_SIZE/8);
533        /* Very special case - no combine */
534        if (shift_amount == BLOCK_SIZE) return carry_block0;
535        Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(BLOCK_SIZE)), BLOCK_SIZE - shift_amount);
536        Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_out, mBuilder->getIntNTy(BLOCK_SIZE)), shift_amount);
537        return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
538    }
539    // We need a buffer of at least two elements for storing the advance data.
540    const unsigned block_shift = shift_amount % BLOCK_SIZE;
541    const unsigned advanceEntries = mCarryInfo->longAdvanceEntries(shift_amount);
542    const unsigned bufsize = mCarryInfo->longAdvanceBufferSize(shift_amount);
543    Value * indexMask = mBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
544    Value * loadIndex0 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
545    Value * storeIndex = mBuilder->CreateAdd(mBuilder->CreateAnd(mBlockNo, indexMask), advBaseIndex);
546    Value * carry_block0 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex0), BLOCK_SIZE/8);
547    // If the long advance is an exact multiple of BLOCK_SIZE, we simply return the oldest
548    // block in the long advance carry data area. 
549    if (block_shift == 0) {
550        mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), BLOCK_SIZE/8);
551        return carry_block0;
552    }
553    // Otherwise we need to combine data from the two oldest blocks.
554    Value * loadIndex1 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries-1)), indexMask), advBaseIndex);
555    Value * carry_block1 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex1), BLOCK_SIZE/8);
556    Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(BLOCK_SIZE)), BLOCK_SIZE - block_shift);
557    Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_block1, mBuilder->getIntNTy(BLOCK_SIZE)), block_shift);
558    mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), BLOCK_SIZE/8);
559    return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
560}
561   
562
563/* Methods for getting and setting carry summary values */
564   
565bool CarryManager::blockHasCarries(){
566    return mCarryInfo->blockHasCarries();
567} 
568
569
570Value * CarryManager::getCarrySummaryExpr() {
571    unsigned summary_index = summaryPackIndex();
572    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
573        Value * pack = getCarryPack(summary_index);
574        Value * summary_bits = maskSelectBitRange(pack, summaryPosition() % mPACK_SIZE, summaryBits());
575        return mBuilder->CreateBitCast(mBuilder->CreateZExt(summary_bits, mBuilder->getIntNTy(BLOCK_SIZE)), mBitBlockType);
576    }
577    else {
578        return getCarryPack(summary_index);
579    }
580}
581
582void CarryManager::initializeCarryDataAtIfEntry() {
583    if (blockHasCarries()) {
584        if (mCarryOutPack[scopeBasePack()] == nullptr) {
585            mCarryInfo->ifEntryPack = mZeroInitializer;
586        }
587        else {
588            mCarryInfo->ifEntryPack = mCarryOutPack[scopeBasePack()];
589        }
590    }
591}
592   
593void CarryManager::buildCarryDataPhisAfterIfBody(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
594    unsigned const ifScopeCarrySize = mCarryInfo->scopeCarryDataSize;
595    if (ifScopeCarrySize == 0) {
596        // No carry data, therefore no phi nodes.
597        return;
598    }
599    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
600        if (ifScopeCarrySize <= mPACK_SIZE) {
601            unsigned const ifPackIndex = scopeBasePack();
602            PHINode * ifPack_phi = mBuilder->CreatePHI(mCarryPackType, 2, "ifPack");
603            ifPack_phi->addIncoming(mCarryInfo->ifEntryPack, ifEntryBlock);
604            ifPack_phi->addIncoming(mCarryOutPack[ifPackIndex], ifBodyFinalBlock);
605            mCarryOutPack[ifPackIndex] = ifPack_phi;
606            return;
607        }
608    }
609    if (mCarryInfo->getIfDepth() > 1) {
610        const unsigned summaryIndex = summaryPackIndex();
611        PHINode * summary_phi = mBuilder->CreatePHI(mCarryPackType, 2, "summary");
612        summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
613        summary_phi->addIncoming(mCarryOutPack[summaryIndex], ifBodyFinalBlock);
614        mCarryOutPack[summaryIndex] = summary_phi;
615    }
616}
617   
618void CarryManager::addSummaryPhiIfNeeded(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
619    if ((mCarryInfo->getIfDepth() <= 1) || !mCarryInfo->blockHasCarries()){
620        // For ifDepth == 1, the parent does not need a summary as it is not itself within an if.
621        // Therefore, it doesn't need access to this block's summary in building its own.
622        return;
623    }
624    const unsigned carrySummaryIndex = summaryPackIndex();
625    PHINode * summary_phi = mBuilder->CreatePHI(mCarryPackType, 2, "summary");
626    summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
627    summary_phi->addIncoming(mCarryOutPack[carrySummaryIndex], ifBodyFinalBlock);
628    mCarryOutPack[carrySummaryIndex] = summary_phi;
629}
630   
631void CarryManager::generateCarryOutSummaryCodeIfNeeded() {
632   
633    if (!mCarryInfo->explicitSummaryRequired()) {
634        // An explicit summary may not be required, if there is a single carry
635        // operation within the block, or the carries are packed and all carry
636        // bits fit within a single pack.
637        return;
638    }
639   
640    const unsigned carrySummaryIndex = summaryPackIndex();
641   
642    Value * carry_summary = mZeroInitializer;
643    if (mCarryInfo->blockHasLongAdvances()) { // Force if entry
644        carry_summary = mOneInitializer;
645    }
646    else {
647        unsigned localCarryIndex = localBasePack();
648        unsigned localCarryPacks = mCarryInfo->getLocalCarryPackCount();
649        if (localCarryPacks > 0) {
650            carry_summary = mCarryOutPack[localCarryIndex];
651            for (unsigned i = 1; i < localCarryPacks; i++) {
652                carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutPack[localCarryIndex+i]);
653            }
654        }
655        for (Statement * stmt : *mCurrentScope) {
656            if (If * innerIf = dyn_cast<If>(stmt)) {
657                PabloBlock * inner_blk = & innerIf->getBody();
658                enterScope(inner_blk);
659                if (blockHasCarries()) {
660                  carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutPack[summaryPackIndex()]);
661                }
662                leaveScope();
663            }
664            else if (While * innerWhile = dyn_cast<While>(stmt)) {
665                PabloBlock * inner_blk = & innerWhile->getBody();
666                enterScope(inner_blk);
667                if (blockHasCarries()) {
668                    carry_summary = mBuilder->CreateOr(carry_summary, mCarryOutPack[summaryPackIndex()]);
669                }
670                leaveScope();
671            }
672        }
673    }
674    // Calculation of the carry out summary is complete.   Store it and make it
675    // available in case it must included by parent blocks.
676    mCarryOutPack[carrySummaryIndex] = carry_summary;
677    storeCarryPack(carrySummaryIndex);
678}
679
680void CarryManager::ensureCarriesLoadedRecursive() {
681    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
682    const unsigned currentScopeBase = scopeBasePack();
683    if (mCarryInfo->getWhileDepth() == 1) {
684        for (auto i = currentScopeBase; i < currentScopeBase + scopeCarryPacks; ++i) {
685            getCarryPack(i);
686        }
687    }
688}
689
690
691void CarryManager::initializeCarryDataPhisAtWhileEntry(BasicBlock * whileEntryBlock) {
692    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
693    mCarryOutAccumPhis.resize(scopeCarryPacks);
694#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
695    const unsigned currentScopeBase = scopeBasePack();
696    mCarryInPhis.resize(scopeCarryPacks);
697#endif
698    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
699#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
700        PHINode * phi_in = mBuilder->CreatePHI(mCarryPackType, 2);
701        phi_in->addIncoming(mCarryInPack[currentScopeBase+index], whileEntryBlock);
702        mCarryInPhis[index] = phi_in;
703#endif
704        PHINode * phi_out = mBuilder->CreatePHI(mCarryPackType, 2);
705        phi_out->addIncoming(mZeroInitializer, whileEntryBlock);
706        mCarryOutAccumPhis[index] = phi_out;
707    }
708}
709
710
711void CarryManager::extendCarryDataPhisAtWhileBodyFinalBlock(BasicBlock * whileBodyFinalBlock) {
712    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
713    const unsigned currentScopeBase = scopeBasePack();
714    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
715#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
716        mCarryInPhis[index]->addIncoming(mZeroInitializer, whileBodyFinalBlock);
717#endif
718        PHINode * phi = mCarryOutAccumPhis[index];
719        Value * carryOut = mBuilder->CreateOr(phi, mCarryOutPack[currentScopeBase+index]);
720        phi->addIncoming(carryOut, whileBodyFinalBlock);
721        mCarryOutPack[currentScopeBase+index] = carryOut;
722    }
723}
724
725void CarryManager::ensureCarriesStoredRecursive() {
726    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
727    const unsigned currentScopeBase = scopeBasePack();
728    if (mCarryInfo->getWhileDepth() == 1) {
729        for (auto i = currentScopeBase; i < currentScopeBase + scopeCarryPacks; ++i) {
730            storeCarryPack(i);
731        }
732    }
733}
734
735/* Store all the full carry packs generated locally in this scope or the
736   single full pack for this scope*/
737void CarryManager::ensureCarriesStoredLocal() {
738    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
739        const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
740        if ((scopeCarryPacks > 0) && ((mCurrentFrameIndex % mPACK_SIZE) == 0)) {
741            // We have carry data and we are not in the middle of a pack.
742            // Write out all local packs.
743            auto localCarryIndex = localBasePack();
744            auto localCarryPacks = mCarryInfo->getLocalCarryPackCount();
745            for (auto i = localCarryIndex; i < localCarryIndex + localCarryPacks; i++) {
746                storeCarryPack(i);
747            }
748            if ((localCarryPacks == 0) && (scopeCarryPacks == 1) && (mCarryInfo->nested.entries > 1)) {
749                storeCarryPack(localCarryIndex);
750            }
751        }
752    }
753}
754
755Value * CarryManager::popCount(Value * to_count, unsigned globalIdx) {
756    Value * countPtr = mBuilder->CreateGEP(mPopcountBasePtr, mBuilder->getInt64(globalIdx));
757    Value * countSoFar = mBuilder->CreateAlignedLoad(countPtr, 8);
758    Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
759    for (int i = 0; i < BLOCK_SIZE/64; i++) {
760        countSoFar = mBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
761    }
762    mBuilder->CreateAlignedStore(countSoFar, countPtr, 8);
763    return mBuilder->CreateBitCast(mBuilder->CreateZExt(countSoFar, mBuilder->getIntNTy(BLOCK_SIZE)), mBitBlockType);
764}
765
766CarryManager::~CarryManager() {
767    for (auto * cd : mCarryInfoVector) {
768        delete cd;
769    }
770}
771
772}
773
Note: See TracBrowser for help on using the repository browser.