source: icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp @ 4881

Last change on this file since 4881 was 4881, checked in by cameron, 3 years ago

Parallel long addition within icgrep improves performance on AVX2

File size: 34.1 KB
RevLine 
[4644]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include <include/simd-lib/bitblock.hpp>
9#include <stdexcept>
[4683]10#include <pablo/carry_data.h>
11#include <pablo/codegenstate.h>
12#include <pablo/carry_manager.h>
13#include <pablo/pabloAST.h>
[4647]14#include <iostream>
[4722]15#include <llvm/Support/CommandLine.h>
[4726]16#include <llvm/IR/BasicBlock.h>
17#include <llvm/IR/CallingConv.h>
18#include <llvm/IR/Function.h>
[4644]19
[4715]20static cl::opt<CarryManagerStrategy> Strategy(cl::desc("Choose carry management strategy:"),
21                                              cl::values(
22                                                         clEnumVal(BitBlockStrategy, "Unpacked, each carry in a separate bitblock."),
23                                                         clEnumVal(SequentialFullyPackedStrategy, "Sequential packing, up to 64 carries per pack."),
24                                                         clEnumValEnd));
25
26
[4644]27namespace pablo {
[4687]28 
29    unsigned doScopeCount(PabloBlock * pb) {
30        unsigned count = 1;
31       
32        for (Statement * stmt : *pb) {
33            if (If * ifStatement = dyn_cast<If>(stmt)) {
[4870]34                count += doScopeCount(ifStatement->getBody());
[4687]35            }
36            else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4870]37                count += doScopeCount(whileStatement->getBody());
[4687]38            }
39        }
40        return count;
41       
42    }
[4824]43   
44void CarryManager::generateCarryDataInitializer(Module * m) {
45    FunctionType * functionType = FunctionType::get(Type::getVoidTy(m->getContext()), std::vector<Type *>({}), false);
46    SmallVector<AttributeSet, 1> Attrs;
47    Attrs.push_back(AttributeSet::get(m->getContext(), ~0U, std::vector<Attribute::AttrKind>({ Attribute::NoUnwind, Attribute::UWTable })));
48    AttributeSet AttrSet = AttributeSet::get(m->getContext(), Attrs);
49   
50    // Create the function that will be generated.
51    Function * f = Function::Create(functionType, GlobalValue::ExternalLinkage, "process_block_initialize_carries", m);
52    f->setCallingConv(CallingConv::C);
53    f->setAttributes(AttrSet);
54    llvm::IRBuilderBase::InsertPoint ip = mBuilder->saveIP();
[4826]55    mBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry1", f,0));
[4827]56    mBuilder->CreateMemSet(mCarryBitBlockPtr, mBuilder->getInt8(0), mTotalCarryDataBitBlocks * mBITBLOCK_WIDTH/8, 4);
[4824]57    ReturnInst::Create(m->getContext(), mBuilder->GetInsertBlock());
58    mBuilder->restoreIP(ip);
59}
60   
61   
[4644]62
[4726]63void CarryManager::initialize(Module * m, PabloBlock * pb) {
[4644]64    mPabloRoot = pb;
[4687]65    unsigned scopeCount = doScopeCount(pb);
66    mCarryInfoVector.resize(scopeCount);
[4729]67    if (Strategy == SequentialFullyPackedStrategy) {
68        mPACK_SIZE = 64;
69        mITEMS_PER_PACK = 64;
70        mCarryPackType = mBuilder->getIntNTy(mPACK_SIZE);
[4821]71        mPackBuilder = new IDISA::IDISA_Builder(mCarryPackType);
72        mPackBuilder->initialize(m, mBuilder);
[4729]73    }
74    else {
[4827]75        mPACK_SIZE = mBITBLOCK_WIDTH;
[4729]76        mITEMS_PER_PACK = 1;
77        mCarryPackType = mBitBlockType;
78    }
[4715]79    unsigned totalCarryDataSize = enumerate(pb, 0, 0);
[4687]80   
[4715]81    unsigned totalPackCount = (totalCarryDataSize + mITEMS_PER_PACK - 1)/mITEMS_PER_PACK;
82
[4703]83    mCarryPackPtr.resize(totalPackCount);
84    mCarryInPack.resize(totalPackCount);
85    mCarryOutPack.resize(totalPackCount);
[4750]86    for (unsigned i = 0; i < totalPackCount; i++) mCarryInPack[i]=nullptr;
[4715]87
88    if (Strategy == SequentialFullyPackedStrategy) {
[4827]89        mTotalCarryDataBitBlocks = (totalCarryDataSize + mBITBLOCK_WIDTH - 1)/mBITBLOCK_WIDTH;       
[4715]90    }
91    else {
92        mTotalCarryDataBitBlocks = totalCarryDataSize;
93    }
[4726]94   
95    ArrayType* cdArrayTy = ArrayType::get(mBitBlockType, mTotalCarryDataBitBlocks);
96    GlobalVariable* cdArray = new GlobalVariable(*m, cdArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, /*Initializer=*/0, "process_block_carry_data");
[4827]97    cdArray->setAlignment(mBITBLOCK_WIDTH/8);
[4726]98    ConstantAggregateZero* cdInitData = ConstantAggregateZero::get(cdArrayTy);
99    cdArray->setInitializer(cdInitData);
100   
101    mCarryPackBasePtr = mBuilder->CreateBitCast(cdArray, PointerType::get(mCarryPackType, 0));
102    mCarryBitBlockPtr = mBuilder->CreateBitCast(cdArray, PointerType::get(mBitBlockType, 0));
103   
[4824]104    generateCarryDataInitializer(m);
105   
[4720]106    // Popcount data is stored after all the carry data.
107    if (mPabloCountCount > 0) {
[4726]108        ArrayType* pcArrayTy = ArrayType::get(mBuilder->getIntNTy(64), mPabloCountCount);
109        GlobalVariable* pcArray = new GlobalVariable(*m, pcArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, 0, "popcount_data");
[4827]110        cdArray->setAlignment(mBITBLOCK_WIDTH/8);
[4726]111        ConstantAggregateZero* pcInitData = ConstantAggregateZero::get(pcArrayTy);
112        pcArray->setInitializer(pcInitData);
113        mPopcountBasePtr = mBuilder->CreateBitCast(pcArray, Type::getInt64PtrTy(mBuilder->getContext()));
[4720]114    }
[4715]115    // Carry Data area will have one extra bit block to store the block number.
[4726]116    GlobalVariable* blkNo = new GlobalVariable(*m, mBuilder->getIntNTy(64), /*isConstant=*/false, GlobalValue::CommonLinkage, 0, "blockNo");
[4733]117    blkNo->setAlignment(16);
[4726]118    blkNo->setInitializer(mBuilder->getInt64(0));
119    mBlockNoPtr = blkNo;
[4715]120    mBlockNo = mBuilder->CreateLoad(mBlockNoPtr);
[4690]121    /*  Set the current scope to PabloRoot */
122    mCurrentScope = mPabloRoot;
[4691]123    mCurrentFrameIndex = 0;
[4690]124    mCarryInfo = mCarryInfoVector[0];
[4644]125}
[4672]126   
[4644]127void CarryManager::generateBlockNoIncrement() {
128    mBuilder->CreateStore(mBuilder->CreateAdd(mBlockNo, mBuilder->getInt64(1)), mBlockNoPtr);
129}
130
131Value * CarryManager::getBlockNoPtr() {
132    return mBlockNoPtr;
133}
134
[4694]135
[4689]136unsigned CarryManager::enumerate(PabloBlock * blk, unsigned ifDepth, unsigned whileDepth) {
[4687]137    llvm::raw_os_ostream cerr(std::cerr);
138    unsigned idx = blk->getScopeIndex();
[4715]139    PabloBlockCarryData * cd = new PabloBlockCarryData(blk, mPACK_SIZE, mITEMS_PER_PACK);
[4687]140    mCarryInfoVector[idx] = cd;
141
142    cd->setIfDepth(ifDepth);
143    cd->setWhileDepth(whileDepth);
[4704]144    unsigned nestedOffset = cd->nested.frameOffset;
[4687]145 
146    for (Statement * stmt : *blk) {
[4720]147        if (Count * c = dyn_cast<Count>(stmt)) {
148            c->setGlobalCountIndex(mPabloCountCount);
149            mPabloCountCount++;
150        }
151        else if (If * ifStatement = dyn_cast<If>(stmt)) {
[4870]152            const unsigned ifCarryDataBits = enumerate(ifStatement->getBody(), ifDepth+1, whileDepth);
153            PabloBlockCarryData * nestedBlockData = mCarryInfoVector[ifStatement->getBody()->getScopeIndex()];
[4715]154            if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
[4714]155                if (cd->roomInFinalPack(nestedOffset) < ifCarryDataBits) {
[4715]156                    nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
[4714]157                }
158            }
[4689]159            nestedBlockData->setFramePosition(nestedOffset);
160
[4687]161            nestedOffset += ifCarryDataBits;
162            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
163            cd->nested.entries++;
[4732]164#ifdef CARRY_DEBUG
[4696]165            nestedBlockData->dumpCarryData(cerr);
166#endif
[4687]167        }
168        else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4870]169            const unsigned whileCarryDataBits = enumerate(whileStatement->getBody(), ifDepth, whileDepth+1);
170            PabloBlockCarryData * nestedBlockData = mCarryInfoVector[whileStatement->getBody()->getScopeIndex()];
[4687]171            //if (whileStatement->isMultiCarry()) whileCarryDataBits *= whileStatement->getMaxIterations();
[4715]172            if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
[4714]173                if (cd->roomInFinalPack(nestedOffset) < whileCarryDataBits) {
[4715]174                    nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
[4714]175                }
176            }
[4689]177            nestedBlockData->setFramePosition(nestedOffset);
[4687]178            nestedOffset += whileCarryDataBits;
179            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
180            cd->nested.entries++;
[4732]181#ifdef CARRY_DEBUG
[4696]182            nestedBlockData->dumpCarryData(cerr);
183#endif
[4687]184        }
185    }
186   
[4704]187    cd->scopeCarryDataSize = nestedOffset;
[4687]188   
189    if (cd->explicitSummaryRequired()) {
190        // Need extra space for the summary variable, always the last
191        // entry within an if block.
[4715]192        if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
193            cd->scopeCarryDataSize = alignCeiling(cd->scopeCarryDataSize, mPACK_SIZE);
[4714]194        }
[4704]195        cd->summary.frameOffset = cd->scopeCarryDataSize;
[4715]196        cd->scopeCarryDataSize += mITEMS_PER_PACK;  //  computed summary is a full pack.
[4687]197    }
198    else {
[4704]199        cd->summary.frameOffset = 0;
[4687]200    }
[4732]201#ifdef CARRY_DEBUG
[4713]202    if (cd->ifDepth == 0) cd->dumpCarryData(cerr);
203#endif
[4704]204    return cd->scopeCarryDataSize;
[4687]205}
206
207
[4670]208/* Entering and leaving blocks. */
[4644]209
[4670]210void CarryManager::enterScope(PabloBlock * blk) {
211   
212    mCurrentScope = blk;
[4687]213    mCarryInfo = mCarryInfoVector[blk->getScopeIndex()];
[4691]214    mCurrentFrameIndex += mCarryInfo->getFrameIndex();
[4704]215    //std::cerr << "enterScope:  blk->getScopeIndex() = " << blk->getScopeIndex() << ", mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
[4670]216}
217
218void CarryManager::leaveScope() {
[4691]219    mCurrentFrameIndex -= mCarryInfo->getFrameIndex();
[4703]220    if (mCurrentScope != mPabloRoot) {
221        mCurrentScope = mCurrentScope->getParent();
222        mCarryInfo = mCarryInfoVector[mCurrentScope->getScopeIndex()];
223    }
[4691]224    //std::cerr << "leaveScope:  mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
[4670]225}
226
[4694]227
228/* Helper routines */
229
[4704]230unsigned CarryManager::absPosition(unsigned frameOffset, unsigned relPos) {
231    return mCurrentFrameIndex + frameOffset + relPos;
[4697]232}
[4694]233
[4697]234
235unsigned CarryManager::carryOpPosition(unsigned localIndex) {
[4704]236    //std::cerr << "carryOpPosition: addWithCarry.frameOffset = " << mCarryInfo->addWithCarry.frameOffset << ", localIndex = " <<localIndex << std::endl;
237    return absPosition(mCarryInfo->addWithCarry.frameOffset, localIndex);
[4697]238}
239
240unsigned CarryManager::advance1Position(unsigned localIndex) {
[4704]241    //std::cerr << "unsigned CarryManager::advance1Position: advance1.frameOffset = " << mCarryInfo->advance1.frameOffset << ", localIndex = " <<localIndex << std::endl;
242    return absPosition(mCarryInfo->advance1.frameOffset, localIndex);
[4697]243}
244
245unsigned CarryManager::shortAdvancePosition(unsigned localIndex) {
[4704]246    return absPosition(mCarryInfo->shortAdvance.frameOffset, localIndex);
[4697]247}
248
[4698]249unsigned CarryManager::longAdvanceBitBlockPosition(unsigned localIndex) {
[4715]250    return (mCurrentFrameIndex + mCarryInfo->longAdvance.frameOffset) / mITEMS_PER_PACK + localIndex;
[4697]251}
[4700]252   
253unsigned CarryManager::localBasePack() {
[4715]254    return (mCurrentFrameIndex + mCarryInfo->shortAdvance.frameOffset) / mITEMS_PER_PACK;
[4700]255}
256   
257unsigned CarryManager::scopeBasePack() {
[4715]258    return mCurrentFrameIndex / mITEMS_PER_PACK;
[4700]259}
260   
[4697]261
[4700]262
[4697]263unsigned CarryManager::summaryPosition() {
[4704]264    return absPosition(mCarryInfo->summary.frameOffset, 0);
[4697]265}
266
[4710]267
268unsigned CarryManager::summaryPackIndex() {
[4715]269    return summaryPosition()/mITEMS_PER_PACK;
[4710]270}
271
[4697]272unsigned CarryManager::summaryBits() {
[4715]273    if (mCarryInfo->scopeCarryDataSize > mITEMS_PER_PACK) return mPACK_SIZE;
[4704]274    else return mCarryInfo->scopeCarryDataSize;
[4697]275}
276
277
278
[4694]279Value * CarryManager::getCarryPack(unsigned packIndex) {
280    if (mCarryInPack[packIndex] == nullptr) {
[4698]281        Value * packPtr = mBuilder->CreateGEP(mCarryPackBasePtr, mBuilder->getInt64(packIndex));
[4710]282        // Save the computed pointer - so that it can be used in storeCarryPack.
[4694]283        mCarryPackPtr[packIndex] = packPtr;
[4715]284        mCarryInPack[packIndex] = mBuilder->CreateAlignedLoad(packPtr, mPACK_SIZE/8);
[4694]285    }
286    return mCarryInPack[packIndex];
287}
[4837]288   
[4696]289void CarryManager::storeCarryPack(unsigned packIndex) {
[4837]290    mBuilder->CreateAlignedStore(mPackBuilder->bitCast(mCarryOutPack[packIndex]), mCarryPackPtr[packIndex], mPACK_SIZE/8);
[4694]291}
292
[4708]293   
[4707]294/* maskSelectBitRange selects the bits of a pack from lo_bit through
[4708]295   lo_bit + bitCount - 1, setting all other bits to zero.  */
296   
[4707]297Value * CarryManager::maskSelectBitRange(Value * pack, unsigned lo_bit, unsigned bitCount) {
[4715]298    if (bitCount == mPACK_SIZE) {
[4707]299        assert(lo_bit == 0);
300        return pack;
[4694]301    }
[4709]302    uint64_t mask = ((((uint64_t) 1) << bitCount) - 1) << lo_bit;
[4837]303    return mPackBuilder->simd_and(pack, mBuilder->getInt64(mask));
[4694]304}
[4708]305   
[4707]306Value * CarryManager::getCarryInBits(unsigned carryBitPos, unsigned carryBitCount) {
[4715]307    unsigned packIndex = carryBitPos / mPACK_SIZE;
308    unsigned packOffset = carryBitPos % mPACK_SIZE;
[4707]309    Value * selected = maskSelectBitRange(getCarryPack(packIndex), packOffset, carryBitCount);
310    if (packOffset == 0) return selected;
311    return mBuilder->CreateLShr(selected, packOffset);
[4704]312}
313
[4707]314void CarryManager::extractAndSaveCarryOutBits(Value * bitblock, unsigned carryBit_pos, unsigned carryBitCount) {
[4715]315    unsigned packIndex = carryBit_pos / mPACK_SIZE;
316    unsigned packOffset = carryBit_pos % mPACK_SIZE;
317    unsigned rshift = mPACK_SIZE - packOffset - carryBitCount;
[4709]318    uint64_t mask = ((((uint64_t) 1) << carryBitCount) - 1)  << packOffset;
319    //std::cerr << "extractAndSaveCarryOutBits: packIndex =" << packIndex << ", packOffset = " << packOffset << ", mask = " << mask << std::endl;
[4827]320    Value * field = iBuilder->mvmd_extract(mPACK_SIZE, bitblock, mBITBLOCK_WIDTH/mPACK_SIZE - 1);
[4707]321    //Value * field = maskSelectBitRange(field, PACK_SIZE - carryBitCount, carryBitCount);
322    if (rshift != 0) {
323        field = mBuilder->CreateLShr(field, mBuilder->getInt64(rshift));
324    }
325    if (packOffset != 0) {
[4709]326        field = mBuilder->CreateAnd(field, mBuilder->getInt64(mask));
[4707]327    }
328    if (mCarryOutPack[packIndex] == nullptr) {
329        mCarryOutPack[packIndex] = field;
330    }
331    else {
[4837]332        mCarryOutPack[packIndex] = mPackBuilder->simd_or(mCarryOutPack[packIndex], field);
[4707]333    }
[4694]334}
[4704]335
[4707]336Value * CarryManager::pack2bitblock(Value * pack) {
[4837]337    return iBuilder->bitCast(mBuilder->CreateZExt(pack, mBuilder->getIntNTy(mBITBLOCK_WIDTH)));
[4707]338}
339   
[4696]340   
341/* Methods for getting and setting individual carry values. */
342   
[4670]343Value * CarryManager::getCarryOpCarryIn(int localIndex) {
[4697]344    unsigned posn = carryOpPosition(localIndex);
[4715]345    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
346        return pack2bitblock(getCarryInBits(posn, 1));
347    }
348    else {
349        return getCarryPack(posn);
350    }
[4644]351}
[4705]352
[4881]353#if (BLOCK_SIZE==256)
354#define LONGADD 1
355#endif
356
[4696]357   
[4703]358void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) {
[4697]359    unsigned posn = carryOpPosition(localIndex);
[4715]360    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
361        extractAndSaveCarryOutBits(carry_out_strm, posn, 1);
[4647]362    }
[4715]363    else {
[4881]364#ifndef LONGADD
[4827]365        Value * carry_bit = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_out_strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH-1);
[4715]366        mCarryOutPack[posn] = mBuilder->CreateBitCast(carry_bit, mBitBlockType);
[4881]367#else
368        mCarryOutPack[posn] = carry_out_strm;
369#endif
[4715]370        if (mCarryInfo->getWhileDepth() == 0) {
371            storeCarryPack(posn);
372        }
373    }
[4644]374}
375
[4708]376Value * CarryManager::addCarryInCarryOut(int localIndex, Value* e1, Value* e2) {
[4828]377    if (mBITBLOCK_WIDTH == 128) {
378        Value * carryq_value = getCarryOpCarryIn(localIndex);
379        //calculate carry through logical ops
[4837]380        Value* carrygen = iBuilder->simd_and(e1, e2);
381        Value* carryprop = iBuilder->simd_or(e1, e2);
[4828]382        Value* digitsum = iBuilder->simd_add(64, e1, e2);
383        Value* partial = iBuilder->simd_add(64, digitsum, carryq_value);
[4837]384        Value* digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(partial)));
[4828]385        Value* mid_carry_in = iBuilder->simd_slli(128, mBuilder->CreateLShr(digitcarry, 63), 64);
386        Value* sum = iBuilder->simd_add(64, partial, mBuilder->CreateBitCast(mid_carry_in, mBitBlockType));
[4837]387        Value* carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(sum)));
[4828]388        setCarryOpCarryOut(localIndex, carry_out_strm);
389        return sum;
390    }
391    else {
[4881]392#ifndef LONGADD
[4828]393        Value * carryq_value = getCarryOpCarryIn(localIndex);
[4837]394        Value* carrygen = iBuilder->simd_and(e1, e2);
395        Value* carryprop = iBuilder->simd_or(e1, e2);
[4828]396        Value * sum = iBuilder->simd_add(mBITBLOCK_WIDTH, iBuilder->simd_add(mBITBLOCK_WIDTH, e1, e2), carryq_value);
[4837]397        Value* carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(sum)));
[4828]398        setCarryOpCarryOut(localIndex, carry_out_strm);
399        return sum;
[4881]400#else
401        Value * carryq_value = getCarryOpCarryIn(localIndex);
402        Value * carryin = iBuilder->mvmd_extract(32, carryq_value, 0);
403        Value * carrygen = iBuilder->simd_and(e1, e2);
404        Value * carryprop = iBuilder->simd_or(e1, e2);
405        Value * digitsum = iBuilder->simd_add(64, e1, e2);
406        Value * digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, mBuilder->CreateNot(digitsum)));
407        Value * carryMask = iBuilder->hsimd_signmask(64, digitcarry);
408        Value * carryMask2 = mBuilder->CreateOr(mBuilder->CreateAdd(carryMask, carryMask), carryin);
409        Value * bubble = iBuilder->simd_eq(64, digitsum, iBuilder->allOnes());
410        Value * bubbleMask = iBuilder->hsimd_signmask(64, bubble);
411        Value * incrementMask = mBuilder->CreateXor(mBuilder->CreateAdd(bubbleMask, carryMask2), bubbleMask);
412        Value * increments = iBuilder->esimd_bitspread(64,incrementMask);
413        Value * sum = iBuilder->simd_add(64, digitsum, increments);
414        Value * carry_out_strm = iBuilder->mvmd_insert(32, iBuilder->allZeroes(), mBuilder->CreateLShr(incrementMask, iBuilder->getBitBlockWidth()/64), 0);
415        setCarryOpCarryOut(localIndex, carry_out_strm);
416        return sum;
417#endif
[4828]418    }
[4708]419}
420
421
[4750]422Value * CarryManager::advanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
[4654]423    if (shift_amount == 1) {
[4670]424        return unitAdvanceCarryInCarryOut(localIndex, strm);
[4647]425    }
[4654]426    else if (shift_amount < LongAdvanceBase) {
[4670]427        return shortAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
[4654]428    }
429    else {
[4670]430        return longAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
[4654]431    }
[4644]432}
433
[4845]434#define DSSLI_FIELDWIDTH 64
435
[4670]436Value * CarryManager::unitAdvanceCarryInCarryOut(int localIndex, Value * strm) {
[4697]437    unsigned posn = advance1Position(localIndex);
[4715]438    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
439        extractAndSaveCarryOutBits(strm, posn, 1);
[4827]440        Value* carry_longint = mBuilder->CreateZExt(getCarryInBits(posn, 1), mBuilder->getIntNTy(mBITBLOCK_WIDTH));
441        Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH));
[4715]442        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), carry_longint);
443        Value* result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
444        return result_value;
445    }
446    mCarryOutPack[posn] = strm;
[4697]447    Value * carry_in = getCarryPack(posn);
[4670]448    if (mCarryInfo->getWhileDepth() == 0) {
[4697]449        storeCarryPack(posn);
[4647]450    }
[4845]451    Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, strm, carry_in, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH -1);
452    return iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-1), iBuilder->simd_slli(DSSLI_FIELDWIDTH, strm, 1));
[4644]453}
454
[4750]455Value * CarryManager::shortAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
[4697]456    unsigned posn = shortAdvancePosition(localIndex);
[4715]457    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
458        extractAndSaveCarryOutBits(strm, posn, shift_amount);
459        //std::cerr << "shortAdvanceCarryInCarryOut: posn = " << posn << ", shift_amount = " << shift_amount << std::endl;
[4827]460        Value* carry_longint = mBuilder->CreateZExt(getCarryInBits(posn, shift_amount), mBuilder->getIntNTy(mBITBLOCK_WIDTH));
461        Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH));
[4715]462        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), carry_longint);
463        Value* result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
464        return result_value;
465    }
466    mCarryOutPack[posn] = strm;
[4697]467    Value * carry_in = getCarryPack(posn);
[4670]468    if (mCarryInfo->getWhileDepth() == 0) {
[4697]469        storeCarryPack(posn);
[4647]470    }
[4845]471    // Use a single whole-byte shift, if possible.
472    if (shift_amount % 8 == 0) {
473        return iBuilder->mvmd_dslli(8, strm, carry_in, iBuilder->getBitBlockWidth()/8 - shift_amount/8);
474    }
475    else if (shift_amount < DSSLI_FIELDWIDTH) {
476        Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, strm, carry_in, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH - 1);
477        return iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-shift_amount), iBuilder->simd_slli(DSSLI_FIELDWIDTH, strm, shift_amount));
478    }
[4827]479    Value* advanceq_longint = mBuilder->CreateBitCast(carry_in, mBuilder->getIntNTy(mBITBLOCK_WIDTH));
480    Value* strm_longint = mBuilder->CreateBitCast(strm, mBuilder->getIntNTy(mBITBLOCK_WIDTH));
481    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), mBuilder->CreateLShr(advanceq_longint, mBITBLOCK_WIDTH - shift_amount), "advance");
[4654]482    return mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4644]483}
[4654]484   
[4647]485
[4654]486/*  currently defined in carry_data.h
487 
[4647]488 static unsigned power2ceil (unsigned v) {
489 unsigned ceil = 1;
490 while (ceil < v) ceil *= 2;
491 return ceil;
492 }
493 
494 unsigned longAdvanceEntries(unsigned shift_amount) const {
[4827]495 return (shift_amount + mBITBLOCK_WIDTH - 1)/mBITBLOCK_WIDTH;
[4647]496 }
497 
498 unsigned longAdvanceBufferSize(unsigned shift_amount)  const {
499 return power2ceil(longAdvanceEntries(shift_amount));
500 }
501 */
[4797]502
[4654]503   
[4750]504Value * CarryManager::longAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * carry_out) {
[4698]505    unsigned carryDataIndex = longAdvanceBitBlockPosition(localIndex);
[4670]506    Value * advBaseIndex = mBuilder->getInt64(carryDataIndex);
[4827]507    if (shift_amount <= mBITBLOCK_WIDTH) {
[4647]508        // special case using a single buffer entry and the carry_out value.
[4698]509        Value * advanceDataPtr = mBuilder->CreateGEP(mCarryBitBlockPtr, advBaseIndex);
[4827]510        Value * carry_block0 = mBuilder->CreateAlignedLoad(advanceDataPtr, mBITBLOCK_WIDTH/8);
511        mBuilder->CreateAlignedStore(carry_out, advanceDataPtr, mBITBLOCK_WIDTH/8);
[4647]512        /* Very special case - no combine */
[4827]513        if (shift_amount == mBITBLOCK_WIDTH) return carry_block0;
514        Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH - shift_amount);
515        Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_out, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), shift_amount);
[4647]516        return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
517    }
518    // We need a buffer of at least two elements for storing the advance data.
[4827]519    const unsigned block_shift = shift_amount % mBITBLOCK_WIDTH;
[4670]520    const unsigned advanceEntries = mCarryInfo->longAdvanceEntries(shift_amount);
521    const unsigned bufsize = mCarryInfo->longAdvanceBufferSize(shift_amount);
[4644]522    Value * indexMask = mBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
523    Value * loadIndex0 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
[4647]524    Value * storeIndex = mBuilder->CreateAdd(mBuilder->CreateAnd(mBlockNo, indexMask), advBaseIndex);
[4827]525    Value * carry_block0 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex0), mBITBLOCK_WIDTH/8);
526    // If the long advance is an exact multiple of mBITBLOCK_WIDTH, we simply return the oldest
[4647]527    // block in the long advance carry data area. 
528    if (block_shift == 0) {
[4827]529        mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBITBLOCK_WIDTH/8);
[4647]530        return carry_block0;
[4644]531    }
532    // Otherwise we need to combine data from the two oldest blocks.
533    Value * loadIndex1 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries-1)), indexMask), advBaseIndex);
[4827]534    Value * carry_block1 = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex1), mBITBLOCK_WIDTH/8);
535    Value* block0_shr = mBuilder->CreateLShr(mBuilder->CreateBitCast(carry_block0, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH - block_shift);
536    Value* block1_shl = mBuilder->CreateShl(mBuilder->CreateBitCast(carry_block1, mBuilder->getIntNTy(mBITBLOCK_WIDTH)), block_shift);
537    mBuilder->CreateAlignedStore(carry_out, mBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBITBLOCK_WIDTH/8);
[4644]538    return mBuilder->CreateBitCast(mBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
539}
[4647]540   
[4644]541
542/* Methods for getting and setting carry summary values */
543   
[4670]544bool CarryManager::blockHasCarries(){
545    return mCarryInfo->blockHasCarries();
[4644]546} 
547
[4670]548
[4838]549Value * CarryManager::generateBitBlockOrSummaryTest(Value * bitblock) {
550    Value * test_expr = bitblock;
551    if (mCarryInfo->blockHasCarries()) {
552        Value * summary_pack = getCarryPack(summaryPackIndex());
553        if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
554            Value * summary_bits = maskSelectBitRange(summary_pack, summaryPosition() % mPACK_SIZE, summaryBits());
555            test_expr = iBuilder->simd_or(test_expr, mBuilder->CreateZExt(summary_bits, mBuilder->getIntNTy(mBITBLOCK_WIDTH)));
556        }
557        else {
558            test_expr = iBuilder->simd_or(test_expr, summary_pack);
559        }
[4715]560    }
[4838]561    return iBuilder->bitblock_any(test_expr);
[4644]562}
563
[4704]564void CarryManager::initializeCarryDataAtIfEntry() {
[4721]565    if (blockHasCarries()) {
566        if (mCarryOutPack[scopeBasePack()] == nullptr) {
[4821]567            mCarryInfo->ifEntryPack = mPackBuilder->allZeroes();
[4721]568        }
569        else {
570            mCarryInfo->ifEntryPack = mCarryOutPack[scopeBasePack()];
571        }
[4703]572    }
[4704]573}
[4703]574   
[4704]575void CarryManager::buildCarryDataPhisAfterIfBody(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
[4813]576    if (mCarryInfo->getWhileDepth() > 0) {
577        // We need to phi out everything for the while carry accumulation process.
578        const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
579        const unsigned currentScopeBase = scopeBasePack();
580        for (unsigned index = currentScopeBase; index < currentScopeBase + scopeCarryPacks; ++index) {
581            PHINode * phi_out = mBuilder->CreatePHI(mCarryPackType, 2);
[4821]582            phi_out->addIncoming(mPackBuilder->allZeroes(),ifEntryBlock);
[4837]583            phi_out->addIncoming(mPackBuilder->bitCast(mCarryOutPack[index]), ifBodyFinalBlock);
[4813]584            mCarryOutPack[index] = phi_out;
585        }
586        return;
587    }
[4704]588    unsigned const ifScopeCarrySize = mCarryInfo->scopeCarryDataSize;
589    if (ifScopeCarrySize == 0) {
590        // No carry data, therefore no phi nodes.
591        return;
[4703]592    }
[4715]593    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
594        if (ifScopeCarrySize <= mPACK_SIZE) {
595            unsigned const ifPackIndex = scopeBasePack();
596            PHINode * ifPack_phi = mBuilder->CreatePHI(mCarryPackType, 2, "ifPack");
597            ifPack_phi->addIncoming(mCarryInfo->ifEntryPack, ifEntryBlock);
[4837]598            ifPack_phi->addIncoming(mPackBuilder->bitCast(mCarryOutPack[ifPackIndex]), ifBodyFinalBlock);
[4715]599            mCarryOutPack[ifPackIndex] = ifPack_phi;
600            return;
601        }
[4704]602    }
[4706]603    if (mCarryInfo->getIfDepth() > 1) {
[4813]604        // Our parent block is also an if.  It needs access to our summary to compute
605        // its own summary.
[4710]606        const unsigned summaryIndex = summaryPackIndex();
[4704]607        PHINode * summary_phi = mBuilder->CreatePHI(mCarryPackType, 2, "summary");
[4821]608        summary_phi->addIncoming(mPackBuilder->allZeroes(), ifEntryBlock);
[4837]609        summary_phi->addIncoming(mPackBuilder->bitCast(mCarryOutPack[summaryIndex]), ifBodyFinalBlock);
[4710]610        mCarryOutPack[summaryIndex] = summary_phi;
[4704]611    }
612}
[4703]613   
[4811]614void CarryManager::addSummaryPhiIfNeeded(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
[4676]615    if ((mCarryInfo->getIfDepth() <= 1) || !mCarryInfo->blockHasCarries()){
616        // For ifDepth == 1, the parent does not need a summary as it is not itself within an if.
617        // Therefore, it doesn't need access to this block's summary in building its own.
618        return;
619    }
[4710]620    const unsigned carrySummaryIndex = summaryPackIndex();
[4700]621    PHINode * summary_phi = mBuilder->CreatePHI(mCarryPackType, 2, "summary");
[4821]622    summary_phi->addIncoming(mPackBuilder->allZeroes(), ifEntryBlock);
[4837]623    summary_phi->addIncoming(mPackBuilder->bitCast(mCarryOutPack[carrySummaryIndex]), ifBodyFinalBlock);
[4696]624    mCarryOutPack[carrySummaryIndex] = summary_phi;
[4644]625}
[4703]626   
[4676]627void CarryManager::generateCarryOutSummaryCodeIfNeeded() {
[4644]628   
[4676]629    if (!mCarryInfo->explicitSummaryRequired()) {
630        // An explicit summary may not be required, if there is a single carry
631        // operation within the block, or the carries are packed and all carry
632        // bits fit within a single pack.
[4644]633        return;
634    }
635   
[4710]636    const unsigned carrySummaryIndex = summaryPackIndex();
[4676]637   
[4821]638    Value * carry_summary = mPackBuilder->allZeroes();
[4670]639    if (mCarryInfo->blockHasLongAdvances()) { // Force if entry
[4821]640        carry_summary = mPackBuilder->allOnes();
[4644]641    }
642    else {
[4750]643        unsigned localCarryIndex = localBasePack();
644        unsigned localCarryPacks = mCarryInfo->getLocalCarryPackCount();
[4644]645        if (localCarryPacks > 0) {
[4696]646            carry_summary = mCarryOutPack[localCarryIndex];
[4750]647            for (unsigned i = 1; i < localCarryPacks; i++) {
[4837]648                carry_summary = mPackBuilder->simd_or(carry_summary, mCarryOutPack[localCarryIndex+i]);
[4644]649            }
[4647]650        }
[4670]651        for (Statement * stmt : *mCurrentScope) {
[4644]652            if (If * innerIf = dyn_cast<If>(stmt)) {
[4870]653                PabloBlock * inner_blk = innerIf->getBody();
[4670]654                enterScope(inner_blk);
655                if (blockHasCarries()) {
[4837]656                  carry_summary = mPackBuilder->simd_or(carry_summary, mCarryOutPack[summaryPackIndex()]);
[4644]657                }
[4670]658                leaveScope();
[4644]659            }
660            else if (While * innerWhile = dyn_cast<While>(stmt)) {
[4870]661                PabloBlock * inner_blk = innerWhile->getBody();
[4670]662                enterScope(inner_blk);
663                if (blockHasCarries()) {
[4837]664                    carry_summary = mPackBuilder->simd_or(carry_summary, mCarryOutPack[summaryPackIndex()]);
[4670]665                }
666                leaveScope();
[4644]667            }
668        }
669    }
670    // Calculation of the carry out summary is complete.   Store it and make it
671    // available in case it must included by parent blocks.
[4696]672    mCarryOutPack[carrySummaryIndex] = carry_summary;
673    storeCarryPack(carrySummaryIndex);
[4644]674}
675
[4670]676void CarryManager::ensureCarriesLoadedRecursive() {
[4696]677    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
[4700]678    const unsigned currentScopeBase = scopeBasePack();
[4670]679    if (mCarryInfo->getWhileDepth() == 1) {
[4700]680        for (auto i = currentScopeBase; i < currentScopeBase + scopeCarryPacks; ++i) {
[4694]681            getCarryPack(i);
[4644]682        }
683    }
684}
685
686
[4670]687void CarryManager::initializeCarryDataPhisAtWhileEntry(BasicBlock * whileEntryBlock) {
[4696]688    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
689    mCarryOutAccumPhis.resize(scopeCarryPacks);
[4644]690#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
[4700]691    const unsigned currentScopeBase = scopeBasePack();
[4696]692    mCarryInPhis.resize(scopeCarryPacks);
693#endif
694    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
695#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
[4700]696        PHINode * phi_in = mBuilder->CreatePHI(mCarryPackType, 2);
697        phi_in->addIncoming(mCarryInPack[currentScopeBase+index], whileEntryBlock);
[4644]698        mCarryInPhis[index] = phi_in;
699#endif
[4700]700        PHINode * phi_out = mBuilder->CreatePHI(mCarryPackType, 2);
[4821]701        phi_out->addIncoming(mPackBuilder->allZeroes(), whileEntryBlock);
[4644]702        mCarryOutAccumPhis[index] = phi_out;
703    }
704}
705
706
[4670]707void CarryManager::extendCarryDataPhisAtWhileBodyFinalBlock(BasicBlock * whileBodyFinalBlock) {
[4696]708    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
[4700]709    const unsigned currentScopeBase = scopeBasePack();
[4696]710    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
[4644]711#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
[4821]712        mCarryInPhis[index]->addIncoming(mPackBuilder->allZeroes(), whileBodyFinalBlock);
[4644]713#endif
714        PHINode * phi = mCarryOutAccumPhis[index];
[4837]715        Value * carryOut = mPackBuilder->simd_or(phi, mCarryOutPack[currentScopeBase+index]);
716        phi->addIncoming(mPackBuilder->bitCast(carryOut), whileBodyFinalBlock);
[4700]717        mCarryOutPack[currentScopeBase+index] = carryOut;
[4644]718    }
719}
720
[4670]721void CarryManager::ensureCarriesStoredRecursive() {
[4696]722    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
[4700]723    const unsigned currentScopeBase = scopeBasePack();
[4670]724    if (mCarryInfo->getWhileDepth() == 1) {
[4700]725        for (auto i = currentScopeBase; i < currentScopeBase + scopeCarryPacks; ++i) {
[4696]726            storeCarryPack(i);
[4644]727        }
728    }
729}
730
[4713]731/* Store all the full carry packs generated locally in this scope or the
732   single full pack for this scope*/
[4710]733void CarryManager::ensureCarriesStoredLocal() {
[4715]734    if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
735        const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
736        if ((scopeCarryPacks > 0) && ((mCurrentFrameIndex % mPACK_SIZE) == 0)) {
737            // We have carry data and we are not in the middle of a pack.
738            // Write out all local packs.
739            auto localCarryIndex = localBasePack();
740            auto localCarryPacks = mCarryInfo->getLocalCarryPackCount();
741            for (auto i = localCarryIndex; i < localCarryIndex + localCarryPacks; i++) {
742                storeCarryPack(i);
743            }
744            if ((localCarryPacks == 0) && (scopeCarryPacks == 1) && (mCarryInfo->nested.entries > 1)) {
745                storeCarryPack(localCarryIndex);
746            }
[4710]747        }
748    }
[4644]749}
750
[4720]751Value * CarryManager::popCount(Value * to_count, unsigned globalIdx) {
752    Value * countPtr = mBuilder->CreateGEP(mPopcountBasePtr, mBuilder->getInt64(globalIdx));
753    Value * countSoFar = mBuilder->CreateAlignedLoad(countPtr, 8);
754    Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
[4827]755    for (int i = 0; i < mBITBLOCK_WIDTH/64; i++) {
[4720]756        countSoFar = mBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
757    }
758    mBuilder->CreateAlignedStore(countSoFar, countPtr, 8);
[4837]759    return iBuilder->bitCast(mBuilder->CreateZExt(countSoFar, mBuilder->getIntNTy(mBITBLOCK_WIDTH)));
[4720]760}
[4726]761
[4712]762CarryManager::~CarryManager() {
763    for (auto * cd : mCarryInfoVector) {
764        delete cd;
765    }
[4710]766}
767
[4712]768}
769
Note: See TracBrowser for help on using the repository browser.