Ignore:
Timestamp:
Feb 2, 2016, 4:02:08 PM (4 years ago)
Author:
nmedfort
Message:

Slight optimization for Simplifier; major change to CarryManager? to build summary variables whenever a carry operation is performed.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r4922 r4925  
    44 *  icgrep is a trademark of International Characters.
    55 */
    6 
    76
    87#include <include/simd-lib/bitblock.hpp>
     
    1211#include <pablo/carry_manager.h>
    1312#include <pablo/pabloAST.h>
    14 #ifdef CARRY_DEBUG
    15 #include <iostream>
    16 #endif
    1713#include <llvm/Support/CommandLine.h>
    1814#include <llvm/IR/BasicBlock.h>
     
    2016#include <llvm/IR/Function.h>
    2117
    22 static cl::opt<CarryManagerStrategy> Strategy(cl::desc("Choose carry management strategy:"),
    23                                               cl::values(
    24                                                          clEnumVal(BitBlockStrategy, "Unpacked, each carry in a separate bitblock."),
    25                                                          clEnumVal(SequentialFullyPackedStrategy, "Sequential packing, up to 64 carries per pack."),
    26                                                          clEnumValEnd));
    27 
     18#if (BLOCK_SIZE==256)
     19#define USING_LONG_ADD
     20#endif
     21#define DSSLI_FIELDWIDTH 64
    2822
    2923namespace pablo {
    30  
    31 unsigned doScopeCount(PabloBlock * pb) {
     24
     25/** ------------------------------------------------------------------------------------------------------------- *
     26 * @brief doScopeCount
     27 ** ------------------------------------------------------------------------------------------------------------- */
     28static unsigned doScopeCount(const PabloBlock * const pb) {
    3229    unsigned count = 1;
    33 
    34     for (Statement * stmt : *pb) {
    35         if (If * ifStatement = dyn_cast<If>(stmt)) {
    36             count += doScopeCount(ifStatement->getBody());
    37         }
    38         else if (While * whileStatement = dyn_cast<While>(stmt)) {
    39             count += doScopeCount(whileStatement->getBody());
     30    for (const Statement * stmt : *pb) {
     31        if (LLVM_UNLIKELY(isa<If>(stmt))) {
     32            count += doScopeCount(cast<If>(stmt)->getBody());
     33        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     34            count += doScopeCount(cast<While>(stmt)->getBody());
    4035        }
    4136    }
    4237    return count;
    43 
    44 }
    45    
     38}
     39
     40/** ------------------------------------------------------------------------------------------------------------- *
     41 * @brief generateCarryDataInitializer
     42 ** ------------------------------------------------------------------------------------------------------------- */
    4643void CarryManager::generateCarryDataInitializer(Module * m) {
    4744    FunctionType * functionType = FunctionType::get(Type::getVoidTy(m->getContext()), std::vector<Type *>({}), false);
     
    5451    f->setCallingConv(CallingConv::C);
    5552    f->setAttributes(AttrSet);
    56     llvm::IRBuilderBase::InsertPoint ip = iBuilder->saveIP();
    57     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry1", f,0));
    58     iBuilder->CreateMemSet(mCarryBitBlockPtr, iBuilder->getInt8(0), mTotalCarryDataBitBlocks * mBITBLOCK_WIDTH/8, 4);
     53    const auto ip = iBuilder->saveIP();
     54    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry1", f, 0));
     55    iBuilder->CreateMemSet(mCarryBitBlockPtr, iBuilder->getInt8(0), mTotalCarryDataBitBlocks * mBitBlockWidth/8, 4);
    5956    ReturnInst::Create(m->getContext(), iBuilder->GetInsertBlock());
    6057    iBuilder->restoreIP(ip);
    6158}
    62    
    63    
    64 
     59
     60/** ------------------------------------------------------------------------------------------------------------- *
     61 * @brief initialize
     62 ** ------------------------------------------------------------------------------------------------------------- */
    6563void CarryManager::initialize(Module * m, PabloBlock * pb) {
    66     mPabloRoot = pb;
    67     unsigned scopeCount = doScopeCount(pb);
    68     mCarryInfoVector.resize(scopeCount);
    69     if (Strategy == SequentialFullyPackedStrategy) {
    70         mPACK_SIZE = 64;
    71         mITEMS_PER_PACK = 64;
    72         mCarryPackType = iBuilder->getIntNTy(mPACK_SIZE);
    73     }
    74     else {
    75         mPACK_SIZE = mBITBLOCK_WIDTH;
    76         mITEMS_PER_PACK = 1;
    77         mCarryPackType = mBitBlockType;
    78     }
    79     unsigned totalCarryDataSize = enumerate(pb, 0, 0);
    80    
    81     unsigned totalPackCount = (totalCarryDataSize + mITEMS_PER_PACK - 1)/mITEMS_PER_PACK;
    82 
    83     mCarryPackPtr.resize(totalPackCount, nullptr);
    84     mCarryInPack.resize(totalPackCount, nullptr);
    85     mCarryOutPack.resize(totalPackCount, nullptr);
    86 
    87     if (Strategy == SequentialFullyPackedStrategy) {
    88         mTotalCarryDataBitBlocks = (totalCarryDataSize + mBITBLOCK_WIDTH - 1)/mBITBLOCK_WIDTH;       
    89     }
    90     else {
    91         mTotalCarryDataBitBlocks = totalCarryDataSize;
    92     }
     64    mRootScope = pb;
     65    mCarryInfoVector.resize(doScopeCount(pb));
     66    mCarryPackType = mBitBlockType;
     67
     68    const unsigned totalCarryDataSize = enumerate(pb, 0, 0);
     69
     70    mCarryPackPtr.resize(totalCarryDataSize, nullptr);
     71    mCarryInPack.resize(totalCarryDataSize, nullptr);
     72    mCarryOutPack.resize(totalCarryDataSize, nullptr);
     73
     74    mTotalCarryDataBitBlocks = totalCarryDataSize;
    9375   
    9476    ArrayType* cdArrayTy = ArrayType::get(mBitBlockType, mTotalCarryDataBitBlocks);
    9577    GlobalVariable* cdArray = new GlobalVariable(*m, cdArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, /*Initializer=*/0, "process_block_carry_data");
    96     cdArray->setAlignment(mBITBLOCK_WIDTH/8);
     78    cdArray->setAlignment(mBitBlockWidth / 8);
    9779    ConstantAggregateZero* cdInitData = ConstantAggregateZero::get(cdArrayTy);
    9880    cdArray->setInitializer(cdInitData);
     
    10789        ArrayType* pcArrayTy = ArrayType::get(iBuilder->getIntNTy(64), mPabloCountCount);
    10890        GlobalVariable* pcArray = new GlobalVariable(*m, pcArrayTy, /*isConstant=*/false, GlobalValue::CommonLinkage, 0, "popcount_data");
    109         cdArray->setAlignment(mBITBLOCK_WIDTH/8);
     91        cdArray->setAlignment(mBitBlockWidth/8);
    11092        ConstantAggregateZero* pcInitData = ConstantAggregateZero::get(pcArrayTy);
    11193        pcArray->setInitializer(pcInitData);
     
    119101    mBlockNo = iBuilder->CreateLoad(mBlockNoPtr);
    120102    /*  Set the current scope to PabloRoot */
    121     mCurrentScope = mPabloRoot;
     103    mCurrentScope = mRootScope;
    122104    mCurrentFrameIndex = 0;
    123105    mCarryInfo = mCarryInfoVector[0];
    124 }
    125    
    126 void CarryManager::generateBlockNoIncrement() {
    127     iBuilder->CreateStore(iBuilder->CreateAdd(mBlockNo, iBuilder->getInt64(1)), mBlockNoPtr);
    128 }
    129 
    130 Value * CarryManager::getBlockNoPtr() {
    131     return mBlockNoPtr;
    132 }
    133 
    134 
    135 unsigned CarryManager::enumerate(PabloBlock * blk, unsigned ifDepth, unsigned whileDepth) {
    136 #ifdef CARRY_DEBUG
    137     llvm::raw_os_ostream cerr(std::cerr);
    138 #endif
    139     unsigned idx = blk->getScopeIndex();
    140     PabloBlockCarryData * cd = new PabloBlockCarryData(blk, mPACK_SIZE, mITEMS_PER_PACK);
    141     mCarryInfoVector[idx] = cd;
    142 
    143     cd->setIfDepth(ifDepth);
    144     cd->setWhileDepth(whileDepth);
    145     unsigned nestedOffset = cd->nested.frameOffset;
    146  
    147     for (Statement * stmt : *blk) {
    148         if (Count * c = dyn_cast<Count>(stmt)) {
    149             c->setGlobalCountIndex(mPabloCountCount);
    150             mPabloCountCount++;
    151         }
    152         else if (If * ifStatement = dyn_cast<If>(stmt)) {
    153             const unsigned ifCarryDataBits = enumerate(ifStatement->getBody(), ifDepth+1, whileDepth);
    154             PabloBlockCarryData * nestedBlockData = mCarryInfoVector[ifStatement->getBody()->getScopeIndex()];
    155             if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
    156                 if (cd->roomInFinalPack(nestedOffset) < ifCarryDataBits) {
    157                     nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
    158                 }
    159             }
    160             nestedBlockData->setFramePosition(nestedOffset);
    161 
    162             nestedOffset += ifCarryDataBits;
    163             if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
    164             cd->nested.entries++;
    165 #ifdef CARRY_DEBUG
    166             nestedBlockData->dumpCarryData(cerr);
    167 #endif
    168         }
    169         else if (While * whileStatement = dyn_cast<While>(stmt)) {
    170             const unsigned whileCarryDataBits = enumerate(whileStatement->getBody(), ifDepth, whileDepth+1);
    171             PabloBlockCarryData * nestedBlockData = mCarryInfoVector[whileStatement->getBody()->getScopeIndex()];
    172             //if (whileStatement->isMultiCarry()) whileCarryDataBits *= whileStatement->getMaxIterations();
    173             if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
    174                 if (cd->roomInFinalPack(nestedOffset) < whileCarryDataBits) {
    175                     nestedOffset = alignCeiling(nestedOffset, mPACK_SIZE);
    176                 }
    177             }
    178             nestedBlockData->setFramePosition(nestedOffset);
    179             nestedOffset += whileCarryDataBits;
    180             if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
    181             cd->nested.entries++;
    182 #ifdef CARRY_DEBUG
    183             nestedBlockData->dumpCarryData(cerr);
    184 #endif
    185         }
    186     }
    187    
    188     cd->scopeCarryDataSize = nestedOffset;
    189    
    190     if (cd->explicitSummaryRequired()) {
    191         // Need extra space for the summary variable, always the last
    192         // entry within an if block.
    193         if (mITEMS_PER_PACK == mPACK_SIZE) {  // PACKING
    194             cd->scopeCarryDataSize = alignCeiling(cd->scopeCarryDataSize, mPACK_SIZE);
    195         }
    196         cd->summary.frameOffset = cd->scopeCarryDataSize;
    197         cd->scopeCarryDataSize += mITEMS_PER_PACK;  //  computed summary is a full pack.
    198     }
    199     else {
    200         cd->summary.frameOffset = 0;
    201     }
    202 #ifdef CARRY_DEBUG
    203     if (cd->ifDepth == 0) cd->dumpCarryData(cerr);
    204 #endif
    205     return cd->scopeCarryDataSize;
    206 }
    207 
    208 
    209 /* Entering and leaving blocks. */
    210 
    211 void CarryManager::enterScope(PabloBlock * blk) {
    212    
    213     mCurrentScope = blk;
    214     mCarryInfo = mCarryInfoVector[blk->getScopeIndex()];
     106    mCarryOutPack[summaryPack()] = Constant::getNullValue(mCarryPackType);
     107}
     108
     109/** ------------------------------------------------------------------------------------------------------------- *
     110 * @brief enterScope
     111 ** ------------------------------------------------------------------------------------------------------------- */
     112void CarryManager::enterScope(PabloBlock * const scope) {
     113    Value * summaryCarry = mCarryOutPack[summaryPack()];
     114    mCarrySummary.push_back(summaryCarry);
     115    mCurrentScope = scope;
     116    mCarryInfo = mCarryInfoVector[scope->getScopeIndex()];
    215117    mCurrentFrameIndex += mCarryInfo->getFrameIndex();
    216     //std::cerr << "enterScope:  blk->getScopeIndex() = " << blk->getScopeIndex() << ", mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
    217 }
    218 
     118    mCarryOutPack[summaryPack()] = Constant::getNullValue(mCarryPackType);
     119}
     120
     121/** ------------------------------------------------------------------------------------------------------------- *
     122 * @brief leaveScope
     123 ** ------------------------------------------------------------------------------------------------------------- */
    219124void CarryManager::leaveScope() {
     125    Value * summaryCarry = mCarryOutPack[summaryPack()];
     126    assert (mCurrentScope != mRootScope);
    220127    mCurrentFrameIndex -= mCarryInfo->getFrameIndex();
    221     if (mCurrentScope != mPabloRoot) {
    222         mCurrentScope = mCurrentScope->getParent();
    223         mCarryInfo = mCarryInfoVector[mCurrentScope->getScopeIndex()];
    224     }
    225     //std::cerr << "leaveScope:  mCurrentFrameIndex = " << mCurrentFrameIndex << std::endl;
    226 }
    227 
    228 
    229 /* Helper routines */
    230 
    231 unsigned CarryManager::absPosition(unsigned frameOffset, unsigned relPos) {
    232     return mCurrentFrameIndex + frameOffset + relPos;
    233 }
    234 
    235 
    236 unsigned CarryManager::carryOpPosition(unsigned localIndex) {
    237     //std::cerr << "carryOpPosition: addWithCarry.frameOffset = " << mCarryInfo->addWithCarry.frameOffset << ", localIndex = " <<localIndex << std::endl;
    238     return absPosition(mCarryInfo->addWithCarry.frameOffset, localIndex);
    239 }
    240 
    241 unsigned CarryManager::advance1Position(unsigned localIndex) {
    242     //std::cerr << "unsigned CarryManager::advance1Position: advance1.frameOffset = " << mCarryInfo->advance1.frameOffset << ", localIndex = " <<localIndex << std::endl;
    243     return absPosition(mCarryInfo->advance1.frameOffset, localIndex);
    244 }
    245 
    246 unsigned CarryManager::shortAdvancePosition(unsigned localIndex) {
    247     return absPosition(mCarryInfo->shortAdvance.frameOffset, localIndex);
    248 }
    249 
    250 unsigned CarryManager::longAdvanceBitBlockPosition(unsigned localIndex) {
    251     return (mCurrentFrameIndex + mCarryInfo->longAdvance.frameOffset) / mITEMS_PER_PACK + localIndex;
    252 }
    253    
    254 unsigned CarryManager::localBasePack() {
    255     return (mCurrentFrameIndex + mCarryInfo->shortAdvance.frameOffset) / mITEMS_PER_PACK;
    256 }
    257    
    258 unsigned CarryManager::scopeBasePack() {
    259     return mCurrentFrameIndex / mITEMS_PER_PACK;
    260 }
    261    
    262 
    263 
    264 unsigned CarryManager::summaryPosition() {
    265     return absPosition(mCarryInfo->summary.frameOffset, 0);
    266 }
    267 
    268 
    269 unsigned CarryManager::summaryPackIndex() {
    270     return summaryPosition()/mITEMS_PER_PACK;
    271 }
    272 
    273 unsigned CarryManager::summaryBits() {
    274     if (mCarryInfo->scopeCarryDataSize > mITEMS_PER_PACK) return mPACK_SIZE;
    275     else return mCarryInfo->scopeCarryDataSize;
    276 }
    277 
    278 
    279 
    280 Value * CarryManager::getCarryPack(unsigned packIndex) {
    281     if (mCarryInPack[packIndex] == nullptr) {
    282         Value * packPtr = iBuilder->CreateGEP(mCarryPackBasePtr, iBuilder->getInt64(packIndex));
    283         // Save the computed pointer - so that it can be used in storeCarryPack.
    284         mCarryPackPtr[packIndex] = packPtr;
    285         mCarryInPack[packIndex] = iBuilder->CreateAlignedLoad(packPtr, mPACK_SIZE/8);
    286     }
    287     return mCarryInPack[packIndex];
    288 }
    289    
    290 void CarryManager::storeCarryPack(unsigned packIndex) {
    291     iBuilder->CreateAlignedStore(mCarryOutPack[packIndex], mCarryPackPtr[packIndex], mPACK_SIZE/8);
    292 }
    293 
    294    
    295 /* maskSelectBitRange selects the bits of a pack from lo_bit through
    296    lo_bit + bitCount - 1, setting all other bits to zero.  */
    297    
    298 Value * CarryManager::maskSelectBitRange(Value * pack, unsigned lo_bit, unsigned bitCount) {
    299     if (bitCount == mPACK_SIZE) {
    300         assert(lo_bit == 0);
    301         return pack;
    302     }
    303     uint64_t mask = ((((uint64_t) 1) << bitCount) - 1) << lo_bit;
    304     return iBuilder->CreateAnd(pack, ConstantInt::get(mCarryPackType, mask));
    305 }
    306    
    307 Value * CarryManager::getCarryInBits(unsigned carryBitPos, unsigned carryBitCount) {
    308     unsigned packIndex = carryBitPos / mPACK_SIZE;
    309     unsigned packOffset = carryBitPos % mPACK_SIZE;
    310     Value * selected = maskSelectBitRange(getCarryPack(packIndex), packOffset, carryBitCount);
    311     if (packOffset == 0) return selected;
    312     return iBuilder->CreateLShr(selected, packOffset);
    313 }
    314 
    315 void CarryManager::extractAndSaveCarryOutBits(Value * bitblock, unsigned carryBit_pos, unsigned carryBitCount) {
    316     unsigned packIndex = carryBit_pos / mPACK_SIZE;
    317     unsigned packOffset = carryBit_pos % mPACK_SIZE;
    318     unsigned rshift = mPACK_SIZE - packOffset - carryBitCount;
    319     uint64_t mask = ((((uint64_t) 1) << carryBitCount) - 1)  << packOffset;
    320     //std::cerr << "extractAndSaveCarryOutBits: packIndex =" << packIndex << ", packOffset = " << packOffset << ", mask = " << mask << std::endl;
    321     Value * field = iBuilder->mvmd_extract(mPACK_SIZE, bitblock, mBITBLOCK_WIDTH/mPACK_SIZE - 1);
    322     //Value * field = maskSelectBitRange(field, PACK_SIZE - carryBitCount, carryBitCount);
    323     if (rshift != 0) {
    324         field = iBuilder->CreateLShr(field, iBuilder->getInt64(rshift));
    325     }
    326     if (packOffset != 0) {
    327         field = iBuilder->CreateAnd(field, iBuilder->getInt64(mask));
    328     }
    329     if (mCarryOutPack[packIndex] == nullptr) {
    330         mCarryOutPack[packIndex] = field;
    331     }
    332     else {
    333         mCarryOutPack[packIndex] = iBuilder->CreateOr(mCarryOutPack[packIndex], field);
    334     }
    335 }
    336 
    337 Value * CarryManager::pack2bitblock(Value * pack) {
    338     return iBuilder->bitCast(iBuilder->CreateZExt(pack, iBuilder->getIntNTy(mBITBLOCK_WIDTH)));
    339 }
    340    
    341    
    342 /* Methods for getting and setting individual carry values. */
    343    
    344 Value * CarryManager::getCarryOpCarryIn(int localIndex) {
    345     unsigned posn = carryOpPosition(localIndex);
    346     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    347         return pack2bitblock(getCarryInBits(posn, 1));
    348     }
    349     else {
    350         return getCarryPack(posn);
    351     }
    352 }
    353 
    354 #if (BLOCK_SIZE==256)
    355 #define LONGADD 1
    356 #endif
    357 
    358    
    359 void CarryManager::setCarryOpCarryOut(unsigned localIndex, Value * carry_out_strm) {
    360     unsigned posn = carryOpPosition(localIndex);
    361     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    362         extractAndSaveCarryOutBits(carry_out_strm, posn, 1);
    363     }
    364     else {
    365 #ifndef LONGADD
    366         Value * carry_bit = iBuilder->CreateLShr(iBuilder->CreateBitCast(carry_out_strm, iBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH-1);
    367         mCarryOutPack[posn] = iBuilder->CreateBitCast(carry_bit, mBitBlockType);
    368 #else
    369         mCarryOutPack[posn] = carry_out_strm;
    370 #endif
    371         if (mCarryInfo->getWhileDepth() == 0) {
    372             storeCarryPack(posn);
    373         }
    374     }
    375 }
    376 
    377 Value * CarryManager::addCarryInCarryOut(int localIndex, Value* e1, Value* e2) {
    378     if (mBITBLOCK_WIDTH == 128) {
    379         Value * carryq_value = getCarryOpCarryIn(localIndex);
     128    mCurrentScope = mCurrentScope->getParent();
     129    mCarryInfo = mCarryInfoVector[mCurrentScope->getScopeIndex()];
     130    mCarryOutPack[summaryPack()] = summaryCarry;
     131    mCarrySummary.pop_back();
     132}
     133
     134/** ------------------------------------------------------------------------------------------------------------- *
     135 * @brief addCarryInCarryOut
     136 ** ------------------------------------------------------------------------------------------------------------- */
     137Value * CarryManager::addCarryInCarryOut(const unsigned localIndex, Value * const e1, Value * const e2) {
     138    Value * sum = nullptr;
     139    if (mBitBlockWidth == 128) {
     140        Value * carryq_value = getCarryIn(localIndex);
    380141        //calculate carry through logical ops
    381         Value* carrygen = iBuilder->simd_and(e1, e2);
    382         Value* carryprop = iBuilder->simd_or(e1, e2);
    383         Value* digitsum = iBuilder->simd_add(64, e1, e2);
    384         Value* partial = iBuilder->simd_add(64, digitsum, carryq_value);
    385         Value* digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(partial)));
    386         Value* mid_carry_in = iBuilder->simd_slli(128, iBuilder->CreateLShr(digitcarry, 63), 64);
    387         Value* sum = iBuilder->simd_add(64, partial, iBuilder->CreateBitCast(mid_carry_in, mBitBlockType));
    388         Value* carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(sum)));
    389         setCarryOpCarryOut(localIndex, carry_out_strm);
    390         return sum;
    391     }
    392     else {
    393 #ifndef LONGADD
    394         Value * carryq_value = getCarryOpCarryIn(localIndex);
    395         Value* carrygen = iBuilder->simd_and(e1, e2);
    396         Value* carryprop = iBuilder->simd_or(e1, e2);
    397         Value * sum = iBuilder->simd_add(mBITBLOCK_WIDTH, iBuilder->simd_add(mBITBLOCK_WIDTH, e1, e2), carryq_value);
    398         Value* carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(sum)));
    399         setCarryOpCarryOut(localIndex, carry_out_strm);
    400         return sum;
    401 #else
    402         Value * carryq_value = getCarryOpCarryIn(localIndex);
     142        Value * carrygen = iBuilder->simd_and(e1, e2);
     143        Value * carryprop = iBuilder->simd_or(e1, e2);
     144        Value * digitsum = iBuilder->simd_add(64, e1, e2);
     145        Value * partial = iBuilder->simd_add(64, digitsum, carryq_value);
     146        Value * digitcarry = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(partial)));
     147        Value * mid_carry_in = iBuilder->simd_slli(128, iBuilder->CreateLShr(digitcarry, 63), 64);
     148        sum = iBuilder->simd_add(64, partial, iBuilder->CreateBitCast(mid_carry_in, mBitBlockType));
     149        Value * carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(sum)));
     150        setCarryOut(localIndex, carry_out_strm);
     151    } else {
     152        #ifndef USING_LONG_ADD
     153        Value * carryq_value = getCarryIn(localIndex);
     154        Value * carrygen = iBuilder->simd_and(e1, e2);
     155        Value * carryprop = iBuilder->simd_or(e1, e2);
     156        sum = iBuilder->simd_add(mBitBlockWidth, iBuilder->simd_add(mBitBlockWidth, e1, e2), carryq_value);
     157        Value * carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(sum)));
     158        setCarryOut(localIndex, carry_out_strm);
     159        #else
     160        Value * carryq_value = getCarryIn(localIndex);
    403161        Value * carryin = iBuilder->mvmd_extract(32, carryq_value, 0);
    404162        Value * carrygen = iBuilder->simd_and(e1, e2);
     
    412170        Value * incrementMask = iBuilder->CreateXor(iBuilder->CreateAdd(bubbleMask, carryMask2), bubbleMask);
    413171        Value * increments = iBuilder->esimd_bitspread(64,incrementMask);
    414         Value * sum = iBuilder->simd_add(64, digitsum, increments);
    415         //Value * carry_out_strm = iBuilder->mvmd_insert(32, iBuilder->allZeroes(), iBuilder->CreateLShr(incrementMask, iBuilder->getBitBlockWidth()/64), 0);
    416         Value * carry_out_strm = iBuilder->CreateZExt(iBuilder->CreateLShr(incrementMask, iBuilder->getBitBlockWidth()/64), iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    417         setCarryOpCarryOut(localIndex, iBuilder->bitCast(carry_out_strm));
    418         return sum;
    419 #endif
    420     }
    421 }
    422 
    423 
    424 Value * CarryManager::advanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
    425     if (shift_amount == 1) {
    426         return unitAdvanceCarryInCarryOut(localIndex, strm);
    427     }
    428     else if (shift_amount < LongAdvanceBase) {
    429         return shortAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
    430     }
    431     else {
    432         return longAdvanceCarryInCarryOut(localIndex, shift_amount, strm);
    433     }
    434 }
    435 
    436 #define DSSLI_FIELDWIDTH 64
    437 
    438 Value * CarryManager::unitAdvanceCarryInCarryOut(int localIndex, Value * strm) {
    439     unsigned posn = advance1Position(localIndex);
    440     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    441         extractAndSaveCarryOutBits(strm, posn, 1);
    442         Value* carry_longint = iBuilder->CreateZExt(getCarryInBits(posn, 1), iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    443         Value* strm_longint = iBuilder->CreateBitCast(strm, iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    444         Value* adv_longint = iBuilder->CreateOr(iBuilder->CreateShl(strm_longint, 1), carry_longint);
    445         Value* result_value = iBuilder->CreateBitCast(adv_longint, mBitBlockType);
    446         return result_value;
    447     }
    448     mCarryOutPack[posn] = strm;
    449     Value * carry_in = getCarryPack(posn);
     172        sum = iBuilder->simd_add(64, digitsum, increments);
     173        Value * carry_out_strm = iBuilder->CreateZExt(iBuilder->CreateLShr(incrementMask, mBitBlockWidth / 64), iBuilder->getIntNTy(mBitBlockWidth));
     174        setCarryOut(localIndex, iBuilder->bitCast(carry_out_strm));
     175        #endif
     176    }
     177    return sum;
     178}
     179
     180/** ------------------------------------------------------------------------------------------------------------- *
     181 * @brief advanceCarryInCarryOut
     182 ** ------------------------------------------------------------------------------------------------------------- */
     183Value * CarryManager::advanceCarryInCarryOut(const unsigned localIndex, const unsigned shiftAmount, Value * const value) {
     184    if (LLVM_LIKELY(shiftAmount == 1)) {
     185        return shortAdvanceCarryInCarryOut(unitAdvancePosition(localIndex), shiftAmount, value);
     186    } else if (shiftAmount < LongAdvanceBase) {
     187        return shortAdvanceCarryInCarryOut(shortAdvancePosition(localIndex), shiftAmount, value);
     188    } else {
     189        return longAdvanceCarryInCarryOut(longAdvancePosition(localIndex), shiftAmount, value);
     190    }
     191}
     192
     193/** ------------------------------------------------------------------------------------------------------------- *
     194 * @brief shortAdvanceCarryInCarryOut
     195 ** ------------------------------------------------------------------------------------------------------------- */
     196Value * CarryManager::shortAdvanceCarryInCarryOut(const unsigned index, const unsigned shiftAmount, Value * const value) {
     197    Value * result = nullptr;
     198    Value * const carryIn = getCarryPack(index);
     199    mCarryOutPack[index] = value;
    450200    if (mCarryInfo->getWhileDepth() == 0) {
    451         storeCarryPack(posn);
    452     }
    453     Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, strm, carry_in, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH -1);
    454     return iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-1), iBuilder->simd_slli(DSSLI_FIELDWIDTH, strm, 1));
    455 }
    456 
    457 Value * CarryManager::shortAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * strm) {
    458     unsigned posn = shortAdvancePosition(localIndex);
    459     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    460         extractAndSaveCarryOutBits(strm, posn, shift_amount);
    461         //std::cerr << "shortAdvanceCarryInCarryOut: posn = " << posn << ", shift_amount = " << shift_amount << std::endl;
    462         Value* carry_longint = iBuilder->CreateZExt(getCarryInBits(posn, shift_amount), iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    463         Value* strm_longint = iBuilder->CreateBitCast(strm, iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    464         Value* adv_longint = iBuilder->CreateOr(iBuilder->CreateShl(strm_longint, shift_amount), carry_longint);
    465         Value* result_value = iBuilder->CreateBitCast(adv_longint, mBitBlockType);
    466         return result_value;
    467     }
    468     mCarryOutPack[posn] = strm;
    469     Value * carry_in = getCarryPack(posn);
    470     if (mCarryInfo->getWhileDepth() == 0) {
    471         storeCarryPack(posn);
    472     }
    473     // Use a single whole-byte shift, if possible.
    474     if (shift_amount % 8 == 0) {
    475         return iBuilder->mvmd_dslli(8, strm, carry_in, iBuilder->getBitBlockWidth()/8 - shift_amount/8);
    476     }
    477     else if (shift_amount < DSSLI_FIELDWIDTH) {
    478         Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, strm, carry_in, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH - 1);
    479         return iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-shift_amount), iBuilder->simd_slli(DSSLI_FIELDWIDTH, strm, shift_amount));
    480     }
    481     Value* advanceq_longint = iBuilder->CreateBitCast(carry_in, iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    482     Value* strm_longint = iBuilder->CreateBitCast(strm, iBuilder->getIntNTy(mBITBLOCK_WIDTH));
    483     Value* adv_longint = iBuilder->CreateOr(iBuilder->CreateShl(strm_longint, shift_amount), iBuilder->CreateLShr(advanceq_longint, mBITBLOCK_WIDTH - shift_amount), "advance");
    484     return iBuilder->CreateBitCast(adv_longint, mBitBlockType);
    485 }
    486    
    487 
    488 /*  currently defined in carry_data.h
    489  
    490  static unsigned power2ceil (unsigned v) {
    491  unsigned ceil = 1;
    492  while (ceil < v) ceil *= 2;
    493  return ceil;
    494  }
    495  
    496  unsigned longAdvanceEntries(unsigned shift_amount) const {
    497  return (shift_amount + mBITBLOCK_WIDTH - 1)/mBITBLOCK_WIDTH;
    498  }
    499  
    500  unsigned longAdvanceBufferSize(unsigned shift_amount)  const {
    501  return power2ceil(longAdvanceEntries(shift_amount));
    502  }
    503  */
    504 
    505    
    506 Value * CarryManager::longAdvanceCarryInCarryOut(int localIndex, unsigned shift_amount, Value * carry_out) {
    507     unsigned carryDataIndex = longAdvanceBitBlockPosition(localIndex);
    508     Value * advBaseIndex = iBuilder->getInt64(carryDataIndex);
    509     if (shift_amount <= mBITBLOCK_WIDTH) {
     201        storeCarryOut(index);
     202    }
     203    if (LLVM_LIKELY(shiftAmount == 1)) {
     204        Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, value, carryIn, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH -1);
     205        result = iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-1), iBuilder->simd_slli(DSSLI_FIELDWIDTH, value, 1));
     206    } else if (shiftAmount % 8 == 0) { // Use a single whole-byte shift, if possible.
     207        result = iBuilder->mvmd_dslli(8, value, carryIn, (iBuilder->getBitBlockWidth() / 8) - (shiftAmount / 8));
     208    } else if (shiftAmount < DSSLI_FIELDWIDTH) {
     209        Value * ahead = iBuilder->mvmd_dslli(DSSLI_FIELDWIDTH, value, carryIn, iBuilder->getBitBlockWidth()/DSSLI_FIELDWIDTH - 1);
     210        result = iBuilder->simd_or(iBuilder->simd_srli(DSSLI_FIELDWIDTH, ahead, DSSLI_FIELDWIDTH-shiftAmount), iBuilder->simd_slli(DSSLI_FIELDWIDTH, value, shiftAmount));
     211    } else {
     212        Value* advanceq_longint = iBuilder->CreateBitCast(carryIn, iBuilder->getIntNTy(mBitBlockWidth));
     213        Value* strm_longint = iBuilder->CreateBitCast(value, iBuilder->getIntNTy(mBitBlockWidth));
     214        Value* adv_longint = iBuilder->CreateOr(iBuilder->CreateShl(strm_longint, shiftAmount), iBuilder->CreateLShr(advanceq_longint, mBitBlockWidth - shiftAmount), "advance");
     215        result = iBuilder->CreateBitCast(adv_longint, mBitBlockType);
     216    }
     217    if (LLVM_LIKELY(hasSummary())) {
     218        addToSummary(value);
     219    }
     220    return result;
     221}
     222
     223/** ------------------------------------------------------------------------------------------------------------- *
     224 * @brief longAdvanceCarryInCarryOut
     225 ** ------------------------------------------------------------------------------------------------------------- */
     226Value * CarryManager::longAdvanceCarryInCarryOut(const unsigned index, const unsigned shiftAmount, Value * const value) {
     227    Value * advBaseIndex = iBuilder->getInt64(index);
     228    if (shiftAmount <= mBitBlockWidth) {
    510229        // special case using a single buffer entry and the carry_out value.
    511230        Value * advanceDataPtr = iBuilder->CreateGEP(mCarryBitBlockPtr, advBaseIndex);
    512         Value * carry_block0 = iBuilder->CreateAlignedLoad(advanceDataPtr, mBITBLOCK_WIDTH/8);
    513         iBuilder->CreateAlignedStore(carry_out, advanceDataPtr, mBITBLOCK_WIDTH/8);
     231        Value * carry_block0 = iBuilder->CreateAlignedLoad(advanceDataPtr, mBitBlockWidth/8);
     232        iBuilder->CreateAlignedStore(value, advanceDataPtr, mBitBlockWidth/8);
    514233        /* Very special case - no combine */
    515         if (shift_amount == mBITBLOCK_WIDTH) return carry_block0;
    516         Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carry_block0, iBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH - shift_amount);
    517         Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_out, iBuilder->getIntNTy(mBITBLOCK_WIDTH)), shift_amount);
     234        if (shiftAmount == mBitBlockWidth) {
     235            return carry_block0;
     236        }
     237        Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carry_block0, iBuilder->getIntNTy(mBitBlockWidth)), mBitBlockWidth - shiftAmount);
     238        Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, iBuilder->getIntNTy(mBitBlockWidth)), shiftAmount);
    518239        return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
    519240    }
    520241    // We need a buffer of at least two elements for storing the advance data.
    521     const unsigned block_shift = shift_amount % mBITBLOCK_WIDTH;
    522     const unsigned advanceEntries = mCarryInfo->longAdvanceEntries(shift_amount);
    523     const unsigned bufsize = mCarryInfo->longAdvanceBufferSize(shift_amount);
     242    const unsigned block_shift = shiftAmount % mBitBlockWidth;
     243    const unsigned advanceEntries = mCarryInfo->longAdvanceEntries(shiftAmount);
     244    const unsigned bufsize = mCarryInfo->longAdvanceBufferSize(shiftAmount);
    524245    Value * indexMask = iBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
    525246    Value * loadIndex0 = iBuilder->CreateAdd(iBuilder->CreateAnd(iBuilder->CreateSub(mBlockNo, iBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
    526247    Value * storeIndex = iBuilder->CreateAdd(iBuilder->CreateAnd(mBlockNo, indexMask), advBaseIndex);
    527     Value * carry_block0 = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex0), mBITBLOCK_WIDTH/8);
     248    Value * carry_block0 = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex0), mBitBlockWidth/8);
    528249    // If the long advance is an exact multiple of mBITBLOCK_WIDTH, we simply return the oldest
    529250    // block in the long advance carry data area. 
    530251    if (block_shift == 0) {
    531         iBuilder->CreateAlignedStore(carry_out, iBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBITBLOCK_WIDTH/8);
     252        iBuilder->CreateAlignedStore(value, iBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBitBlockWidth/8);
    532253        return carry_block0;
    533254    }
    534255    // Otherwise we need to combine data from the two oldest blocks.
    535256    Value * loadIndex1 = iBuilder->CreateAdd(iBuilder->CreateAnd(iBuilder->CreateSub(mBlockNo, iBuilder->getInt64(advanceEntries-1)), indexMask), advBaseIndex);
    536     Value * carry_block1 = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex1), mBITBLOCK_WIDTH/8);
    537     Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carry_block0, iBuilder->getIntNTy(mBITBLOCK_WIDTH)), mBITBLOCK_WIDTH - block_shift);
    538     Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, iBuilder->getIntNTy(mBITBLOCK_WIDTH)), block_shift);
    539     iBuilder->CreateAlignedStore(carry_out, iBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBITBLOCK_WIDTH/8);
     257    Value * carry_block1 = iBuilder->CreateAlignedLoad(iBuilder->CreateGEP(mCarryBitBlockPtr, loadIndex1), mBitBlockWidth/8);
     258    Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carry_block0, iBuilder->getIntNTy(mBitBlockWidth)), mBitBlockWidth - block_shift);
     259    Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(carry_block1, iBuilder->getIntNTy(mBitBlockWidth)), block_shift);
     260    iBuilder->CreateAlignedStore(value, iBuilder->CreateGEP(mCarryBitBlockPtr, storeIndex), mBitBlockWidth/8);
    540261    return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), mBitBlockType);
    541262}
    542    
    543 
    544 /* Methods for getting and setting carry summary values */
    545    
    546 bool CarryManager::blockHasCarries(){
    547     return mCarryInfo->blockHasCarries();
    548 }
    549 
    550 
    551 Value * CarryManager::generateBitBlockOrSummaryTest(Value * bitblock) {
    552     Value * test_expr = bitblock;
    553     if (mCarryInfo->blockHasCarries()) {
    554         Value * summary_pack = getCarryPack(summaryPackIndex());
    555         if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    556             Value * summary_bits = maskSelectBitRange(summary_pack, summaryPosition() % mPACK_SIZE, summaryBits());
    557             test_expr = iBuilder->simd_or(test_expr, iBuilder->CreateZExt(summary_bits, iBuilder->getIntNTy(mBITBLOCK_WIDTH)));
    558         }
    559         else {
    560             test_expr = iBuilder->simd_or(test_expr, summary_pack);
    561         }
    562     }
    563     return iBuilder->bitblock_any(test_expr);
    564 }
    565 
    566 void CarryManager::initializeCarryDataAtIfEntry() {
    567     if (blockHasCarries()) {
    568         if (mCarryOutPack[scopeBasePack()] == nullptr) {
    569             mCarryInfo->ifEntryPack = Constant::getNullValue(mCarryPackType);
    570         }
    571         else {
    572             mCarryInfo->ifEntryPack = mCarryOutPack[scopeBasePack()];
    573         }
    574     }
    575 }
    576    
    577 void CarryManager::buildCarryDataPhisAfterIfBody(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
     263
     264/** ------------------------------------------------------------------------------------------------------------- *
     265 * @brief generateSummaryTest
     266 ** ------------------------------------------------------------------------------------------------------------- */
     267Value * CarryManager::generateSummaryTest(Value * condition) {
     268    if (mCarryInfo->hasCarries()) {
     269        Value * summary_pack = getCarryPack(summaryPack());
     270        condition = iBuilder->simd_or(condition, summary_pack);
     271    }
     272    return iBuilder->bitblock_any(condition);
     273}
     274
     275/** ------------------------------------------------------------------------------------------------------------- *
     276 * @brief storeCarryOutSummary
     277 ** ------------------------------------------------------------------------------------------------------------- */
     278void CarryManager::storeCarryOutSummary() {
     279    if (LLVM_LIKELY(mCarryInfo->explicitSummaryRequired())) {
     280        const unsigned carrySummaryIndex = summaryPack();
     281        if (LLVM_UNLIKELY(mCarryInfo->hasLongAdvances())) { // Force if entry
     282            mCarryOutPack[carrySummaryIndex] = Constant::getAllOnesValue(mCarryPackType);
     283        }
     284        storeCarryOut(carrySummaryIndex);
     285    }
     286}
     287
     288/** ------------------------------------------------------------------------------------------------------------- *
     289 * @brief popCount
     290 ** ------------------------------------------------------------------------------------------------------------- */
     291Value * CarryManager::popCount(Value * to_count, unsigned globalIdx) {
     292    Value * countPtr = iBuilder->CreateGEP(mPopcountBasePtr, iBuilder->getInt64(globalIdx));
     293    Value * countSoFar = iBuilder->CreateAlignedLoad(countPtr, 8);
     294    Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
     295    for (int i = 0; i < mBitBlockWidth/64; i++) {
     296        countSoFar = iBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
     297    }
     298    iBuilder->CreateAlignedStore(countSoFar, countPtr, 8);
     299    return iBuilder->bitCast(iBuilder->CreateZExt(countSoFar, iBuilder->getIntNTy(mBitBlockWidth)));
     300}
     301
     302/** ------------------------------------------------------------------------------------------------------------- *
     303 * @brief blendCarrySummaryWithOuterSummary
     304 ** ------------------------------------------------------------------------------------------------------------- */
     305void CarryManager::blendCarrySummaryWithOuterSummary() {
     306    if (LLVM_LIKELY(mCarrySummary.size() > 0)) {
     307        addToSummary(mCarrySummary.back());
     308    }
     309}
     310
     311/** ------------------------------------------------------------------------------------------------------------- *
     312 * @brief buildCarryDataPhisAfterIfBody
     313 ** ------------------------------------------------------------------------------------------------------------- */
     314void CarryManager::buildCarryDataPhisAfterIfBody(BasicBlock * const entry, BasicBlock * const end) {
    578315    if (mCarryInfo->getWhileDepth() > 0) {
    579316        // We need to phi out everything for the while carry accumulation process.
     317        const unsigned scopeBaseOffset = scopeBasePack();
    580318        const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
    581         const unsigned currentScopeBase = scopeBasePack();
    582         for (unsigned index = currentScopeBase; index < currentScopeBase + scopeCarryPacks; ++index) {
    583             PHINode * phi_out = iBuilder->CreatePHI(mCarryPackType, 2);
    584             phi_out->addIncoming(Constant::getNullValue(mCarryPackType),ifEntryBlock);
    585             phi_out->addIncoming(mCarryOutPack[index], ifBodyFinalBlock);
    586             mCarryOutPack[index] = phi_out;
    587         }
    588         return;
    589     }
    590     unsigned const ifScopeCarrySize = mCarryInfo->scopeCarryDataSize;
    591     if (ifScopeCarrySize == 0) {
    592         // No carry data, therefore no phi nodes.
    593         return;
    594     }
    595     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    596         if (ifScopeCarrySize <= mPACK_SIZE) {
    597             unsigned const ifPackIndex = scopeBasePack();
    598             PHINode * ifPack_phi = iBuilder->CreatePHI(mCarryPackType, 2, "ifPack");
    599             ifPack_phi->addIncoming(mCarryInfo->ifEntryPack, ifEntryBlock);
    600             ifPack_phi->addIncoming(mCarryOutPack[ifPackIndex], ifBodyFinalBlock);
    601             mCarryOutPack[ifPackIndex] = ifPack_phi;
    602             return;
    603         }
    604     }
    605     if (mCarryInfo->getIfDepth() > 1) {
    606         // Our parent block is also an if.  It needs access to our summary to compute
    607         // its own summary.
    608         const unsigned summaryIndex = summaryPackIndex();
    609         PHINode * summary_phi = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
    610         summary_phi->addIncoming(Constant::getNullValue(mCarryPackType), ifEntryBlock);
    611         summary_phi->addIncoming(mCarryOutPack[summaryIndex], ifBodyFinalBlock);
    612         mCarryOutPack[summaryIndex] = summary_phi;
    613     }
    614 }
    615    
    616 void CarryManager::addSummaryPhiIfNeeded(BasicBlock * ifEntryBlock, BasicBlock * ifBodyFinalBlock) {
    617     if ((mCarryInfo->getIfDepth() <= 1) || !mCarryInfo->blockHasCarries()){
    618         // For ifDepth == 1, the parent does not need a summary as it is not itself within an if.
    619         // Therefore, it doesn't need access to this block's summary in building its own.
    620         return;
    621     }
    622     const unsigned carrySummaryIndex = summaryPackIndex();
    623     PHINode * summary_phi = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
    624     summary_phi->addIncoming(Constant::getNullValue(mCarryPackType), ifEntryBlock);
    625     summary_phi->addIncoming(mCarryOutPack[carrySummaryIndex], ifBodyFinalBlock);
    626     mCarryOutPack[carrySummaryIndex] = summary_phi;
    627 }
    628    
    629 void CarryManager::generateCarryOutSummaryCodeIfNeeded() {
    630    
    631     if (!mCarryInfo->explicitSummaryRequired()) {
    632         // An explicit summary may not be required, if there is a single carry
    633         // operation within the block, or the carries are packed and all carry
    634         // bits fit within a single pack.
    635         return;
    636     }
    637    
    638     const unsigned carrySummaryIndex = summaryPackIndex();
    639    
    640     Value * carry_summary = nullptr;
    641     if (mCarryInfo->blockHasLongAdvances()) { // Force if entry
    642         carry_summary = Constant::getAllOnesValue(mCarryPackType);
    643     }
    644     else {
    645         carry_summary = Constant::getNullValue(mCarryPackType);
    646         unsigned localCarryIndex = localBasePack();
    647         unsigned localCarryPacks = mCarryInfo->getLocalCarryPackCount();
    648         if (localCarryPacks > 0) {
    649             carry_summary = mCarryOutPack[localCarryIndex];
    650             for (unsigned i = 1; i < localCarryPacks; i++) {
    651                 carry_summary = iBuilder->CreateOr(carry_summary, mCarryOutPack[localCarryIndex + i]);
    652             }
    653         }
    654 
    655 
    656         // iBuilder->SetInsertPoint(&(iBuilder->GetInsertBlock()->back()));
    657 
    658         for (Statement * stmt : *mCurrentScope) {
    659             if (If * innerIf = dyn_cast<If>(stmt)) {
    660                 PabloBlock * inner_blk = innerIf->getBody();
    661                 enterScope(inner_blk);
    662                 if (blockHasCarries()) {                   
    663                     carry_summary = iBuilder->CreateOr(carry_summary, mCarryOutPack[summaryPackIndex()]);
    664                 }
    665                 leaveScope();
    666             }
    667             else if (While * innerWhile = dyn_cast<While>(stmt)) {
    668                 PabloBlock * inner_blk = innerWhile->getBody();
    669                 enterScope(inner_blk);
    670                 if (blockHasCarries()) {
    671                     carry_summary = iBuilder->CreateOr(carry_summary, mCarryOutPack[summaryPackIndex()]);
    672                 }
    673                 leaveScope();
    674             }
    675         }
    676     }
    677     // Calculation of the carry out summary is complete.   Store it and make it
    678     // available in case it must included by parent blocks.
    679     mCarryOutPack[carrySummaryIndex] = carry_summary;
    680     storeCarryPack(carrySummaryIndex);
    681 }
    682 
     319        for (unsigned i = scopeBaseOffset; i < scopeBaseOffset + scopeCarryPacks; ++i) {
     320            Type * const type = mCarryOutPack[i]->getType();
     321            PHINode * phi = iBuilder->CreatePHI(type, 2);
     322            phi->addIncoming(Constant::getNullValue(type), entry);
     323            phi->addIncoming(mCarryOutPack[i], end);
     324            mCarryOutPack[i] = phi;
     325        }
     326    }
     327    if (LLVM_LIKELY(mCarrySummary.size() > 0)) {
     328        const unsigned summaryIndex = summaryPack();
     329        Value * carrySummary = mCarryOutPack[summaryIndex];
     330        if (mCarrySummary.back() != carrySummary) {
     331            Value * outerCarrySummary = mCarrySummary.back();
     332            Value * nestedCarrySummary = mCarryOutPack[summaryIndex];
     333            assert (outerCarrySummary->getType() == nestedCarrySummary->getType());
     334            PHINode * const phi = iBuilder->CreatePHI(outerCarrySummary->getType(), 2, "summary");
     335            phi->addIncoming(outerCarrySummary, entry);
     336            phi->addIncoming(nestedCarrySummary, end);
     337            mCarryOutPack[summaryIndex] = phi;
     338        }
     339    }
     340}
     341
     342/** ------------------------------------------------------------------------------------------------------------- *
     343 * @brief initializeWhileEntryCarryDataPhis
     344 ** ------------------------------------------------------------------------------------------------------------- */
     345void CarryManager::initializeWhileEntryCarryDataPhis(BasicBlock * const end) {
     346    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
     347    mCarryOutAccumPhis.resize(scopeCarryPacks);
     348    #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
     349    const unsigned currentScopeBase = scopeBasePack();
     350    mCarryInPhis.resize(scopeCarryPacks);
     351    #endif
     352    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
     353        #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
     354        PHINode * phi_in = iBuilder->CreatePHI(mCarryPackType, 2);
     355        phi_in->addIncoming(mCarryInPack[currentScopeBase+index], whileEntryBlock);
     356        mCarryInPhis[index] = phi_in;
     357        #endif
     358        PHINode * phi_out = iBuilder->CreatePHI(mCarryPackType, 2);
     359        phi_out->addIncoming(Constant::getNullValue(mCarryPackType), end);
     360        mCarryOutAccumPhis[index] = phi_out;
     361    }
     362}
     363
     364/** ------------------------------------------------------------------------------------------------------------- *
     365 * @brief finalizeWhileBlockCarryDataPhis
     366 ** ------------------------------------------------------------------------------------------------------------- */
     367void CarryManager::finalizeWhileBlockCarryDataPhis(BasicBlock * const end) {
     368    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
     369    const unsigned currentScopeBase = scopeBasePack();
     370    for (unsigned index = 0; index < scopeCarryPacks; ++index) {
     371        #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
     372        mCarryInPhis[index]->addIncoming(Constant::getNullValue(mCarryPackType), whileBodyFinalBlock);
     373        #endif
     374        PHINode * phi = mCarryOutAccumPhis[index];
     375        Value * carryOut = iBuilder->CreateOr(phi, mCarryOutPack[currentScopeBase + index]);
     376        phi->addIncoming(carryOut, end);
     377        mCarryOutPack[currentScopeBase + index] = carryOut;
     378    }
     379}
     380
     381/** ------------------------------------------------------------------------------------------------------------- *
     382 * @brief ensureCarriesLoadedRecursive
     383 ** ------------------------------------------------------------------------------------------------------------- */
    683384void CarryManager::ensureCarriesLoadedRecursive() {
    684385    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
     
    691392}
    692393
    693 
    694 void CarryManager::initializeCarryDataPhisAtWhileEntry(BasicBlock * whileEntryBlock) {
    695     const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
    696     mCarryOutAccumPhis.resize(scopeCarryPacks);
    697 #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
    698     const unsigned currentScopeBase = scopeBasePack();
    699     mCarryInPhis.resize(scopeCarryPacks);
    700 #endif
    701     for (unsigned index = 0; index < scopeCarryPacks; ++index) {
    702 #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
    703         PHINode * phi_in = iBuilder->CreatePHI(mCarryPackType, 2);
    704         phi_in->addIncoming(mCarryInPack[currentScopeBase+index], whileEntryBlock);
    705         mCarryInPhis[index] = phi_in;
    706 #endif
    707         PHINode * phi_out = iBuilder->CreatePHI(mCarryPackType, 2);
    708         phi_out->addIncoming(Constant::getNullValue(mCarryPackType), whileEntryBlock);
    709         mCarryOutAccumPhis[index] = phi_out;
    710     }
    711 }
    712 
    713 
    714 void CarryManager::extendCarryDataPhisAtWhileBodyFinalBlock(BasicBlock * whileBodyFinalBlock) {
    715     const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
    716     const unsigned currentScopeBase = scopeBasePack();
    717     for (unsigned index = 0; index < scopeCarryPacks; ++index) {
    718 #ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
    719         mCarryInPhis[index]->addIncoming(Constant::getNullValue(mCarryPackType), whileBodyFinalBlock);
    720 #endif
    721         PHINode * phi = mCarryOutAccumPhis[index];
    722         Value * carryOut = iBuilder->CreateOr(phi, mCarryOutPack[currentScopeBase+index]);
    723         phi->addIncoming(carryOut, whileBodyFinalBlock);
    724         mCarryOutPack[currentScopeBase+index] = carryOut;
    725     }
    726 }
    727 
     394/** ------------------------------------------------------------------------------------------------------------- *
     395 * @brief ensureCarriesStoredRecursive
     396 ** ------------------------------------------------------------------------------------------------------------- */
    728397void CarryManager::ensureCarriesStoredRecursive() {
    729398    const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
     
    731400    if (mCarryInfo->getWhileDepth() == 1) {
    732401        for (auto i = currentScopeBase; i < currentScopeBase + scopeCarryPacks; ++i) {
    733             storeCarryPack(i);
    734         }
    735     }
    736 }
    737 
    738 /* Store all the full carry packs generated locally in this scope or the
    739    single full pack for this scope*/
    740 void CarryManager::ensureCarriesStoredLocal() {
    741     if (mITEMS_PER_PACK > 1) {// #ifdef PACKING
    742         const unsigned scopeCarryPacks = mCarryInfo->getScopeCarryPackCount();
    743         if ((scopeCarryPacks > 0) && ((mCurrentFrameIndex % mPACK_SIZE) == 0)) {
    744             // We have carry data and we are not in the middle of a pack.
    745             // Write out all local packs.
    746             auto localCarryIndex = localBasePack();
    747             auto localCarryPacks = mCarryInfo->getLocalCarryPackCount();
    748             for (auto i = localCarryIndex; i < localCarryIndex + localCarryPacks; i++) {
    749                 storeCarryPack(i);
     402            storeCarryOut(i);
     403        }
     404    }
     405}
     406
     407/** ------------------------------------------------------------------------------------------------------------- *
     408 * @brief getCarryIn
     409 ** ------------------------------------------------------------------------------------------------------------- */
     410Value * CarryManager::getCarryIn(const unsigned localIndex) {
     411    return getCarryPack(addPosition(localIndex));
     412}
     413
     414/** ------------------------------------------------------------------------------------------------------------- *
     415 * @brief setCarryOut
     416 ** ------------------------------------------------------------------------------------------------------------- */
     417void CarryManager::setCarryOut(const unsigned localIndex, Value * carryOut) {
     418    const unsigned index = addPosition(localIndex);
     419    #ifndef USING_LONG_ADD
     420    Value * carry_bit = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryOut, iBuilder->getIntNTy(mBitBlockWidth)), mBitBlockWidth-1);
     421    carryOut = iBuilder->CreateBitCast(carry_bit, mBitBlockType);
     422    #endif
     423    mCarryOutPack[index] = carryOut;
     424    if (LLVM_LIKELY(hasSummary())) {
     425        addToSummary(carryOut);
     426    }
     427    if (mCarryInfo->getWhileDepth() == 0) {
     428        storeCarryOut(index);
     429    }
     430}
     431
     432/** ------------------------------------------------------------------------------------------------------------- *
     433 * @brief enumerate
     434 ** ------------------------------------------------------------------------------------------------------------- */
     435unsigned CarryManager::enumerate(PabloBlock * blk, unsigned ifDepth, unsigned whileDepth) {
     436    unsigned idx = blk->getScopeIndex();
     437    CarryData * cd = new CarryData(blk, mBitBlockWidth, 1);
     438    mCarryInfoVector[idx] = cd;
     439
     440    cd->setIfDepth(ifDepth);
     441    cd->setWhileDepth(whileDepth);
     442    unsigned nestedOffset = cd->nested.frameOffset;
     443
     444    for (Statement * stmt : *blk) {
     445        if (Count * c = dyn_cast<Count>(stmt)) {
     446            c->setGlobalCountIndex(mPabloCountCount);
     447            mPabloCountCount++;
     448        } else if (If * ifStatement = dyn_cast<If>(stmt)) {
     449            const unsigned ifCarryDataBits = enumerate(ifStatement->getBody(), ifDepth + 1, whileDepth);
     450            CarryData * nestedBlockData = mCarryInfoVector[ifStatement->getBody()->getScopeIndex()];
     451            if (1 == mBitBlockWidth) {  // PACKING
     452                if (cd->roomInFinalPack(nestedOffset) < ifCarryDataBits) {
     453                    nestedOffset = alignCeiling(nestedOffset, mBitBlockWidth);
     454                }
    750455            }
    751             if ((localCarryPacks == 0) && (scopeCarryPacks == 1) && (mCarryInfo->nested.entries > 1)) {
    752                 storeCarryPack(localCarryIndex);
     456            nestedBlockData->setFramePosition(nestedOffset);
     457            nestedOffset += ifCarryDataBits;
     458            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) {
     459                cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
    753460            }
    754         }
    755     }
    756 }
    757 
    758 Value * CarryManager::popCount(Value * to_count, unsigned globalIdx) {
    759     Value * countPtr = iBuilder->CreateGEP(mPopcountBasePtr, iBuilder->getInt64(globalIdx));
    760     Value * countSoFar = iBuilder->CreateAlignedLoad(countPtr, 8);
    761     Value * fieldCounts = iBuilder->simd_popcount(64, to_count);
    762     for (int i = 0; i < mBITBLOCK_WIDTH/64; i++) {
    763         countSoFar = iBuilder->CreateAdd(countSoFar, iBuilder->mvmd_extract(64, fieldCounts, i));
    764     }
    765     iBuilder->CreateAlignedStore(countSoFar, countPtr, 8);
    766     return iBuilder->bitCast(iBuilder->CreateZExt(countSoFar, iBuilder->getIntNTy(mBITBLOCK_WIDTH)));
     461            cd->nested.entries++;
     462        } else if (While * whileStatement = dyn_cast<While>(stmt)) {
     463            const unsigned whileCarryDataBits = enumerate(whileStatement->getBody(), ifDepth, whileDepth + 1);
     464            CarryData * const nestedBlockData = mCarryInfoVector[whileStatement->getBody()->getScopeIndex()];
     465            if (1 == mBitBlockWidth) {  // PACKING
     466                if (cd->roomInFinalPack(nestedOffset) < whileCarryDataBits) {
     467                    nestedOffset = alignCeiling(nestedOffset, mBitBlockWidth);
     468                }
     469            }
     470            nestedBlockData->setFramePosition(nestedOffset);
     471            nestedOffset += whileCarryDataBits;
     472            if (cd->maxNestingDepth <= nestedBlockData->maxNestingDepth) {
     473                cd->maxNestingDepth = nestedBlockData->maxNestingDepth + 1;
     474            }
     475            cd->nested.entries++;
     476        }
     477    }
     478
     479    cd->scopeCarryDataSize = nestedOffset;
     480
     481    if (cd->explicitSummaryRequired()) {
     482        // Need extra space for the summary variable, always the last
     483        // entry within an if block.
     484        if (1 == mBitBlockWidth) {  // PACKING
     485            cd->scopeCarryDataSize = alignCeiling(cd->scopeCarryDataSize, mBitBlockWidth);
     486        }
     487        cd->summary.frameOffset = cd->scopeCarryDataSize;
     488        cd->scopeCarryDataSize += 1;  //  computed summary is a full pack.
     489    } else {
     490        cd->summary.frameOffset = 0;
     491    }
     492
     493    return cd->scopeCarryDataSize;
     494}
     495
     496/** ------------------------------------------------------------------------------------------------------------- *
     497 * @brief addToSummary
     498 ** ------------------------------------------------------------------------------------------------------------- */
     499inline Value * CarryManager::addToSummary(Value * const value) {
     500    const unsigned summaryIndex = summaryPack();
     501    Value * summary = mCarryOutPack[summaryIndex];
     502    assert (summary);
     503    assert (value);
     504    if (LLVM_UNLIKELY(isa<Constant>(summary))) {
     505        if (LLVM_LIKELY(cast<Constant>(summary)->isZeroValue())) {
     506            summary = value;
     507            goto return_result;
     508        } else if (cast<Constant>(summary)->isAllOnesValue()) {
     509            goto return_result;
     510        }
     511    }
     512    if (LLVM_UNLIKELY(isa<Constant>(value))) {
     513        if (LLVM_LIKELY(cast<Constant>(value)->isZeroValue())) {
     514            goto return_result;
     515        } else if (cast<Constant>(summary)->isAllOnesValue()) {
     516            summary = value;
     517            goto return_result;
     518        }
     519    }
     520    if (LLVM_LIKELY(summary != value)) {
     521        summary = iBuilder->CreateOr(summary, value, "summary");
     522    }
     523return_result:
     524    mCarryOutPack[summaryIndex] = summary;
     525    return summary;
     526}
     527
     528/** ------------------------------------------------------------------------------------------------------------- *
     529 * @brief getCarryPack
     530 ** ------------------------------------------------------------------------------------------------------------- */
     531Value * CarryManager::getCarryPack(const unsigned packIndex) {
     532    if (mCarryInPack[packIndex] == nullptr) {
     533        Value * const packPtr = iBuilder->CreateGEP(mCarryPackBasePtr, iBuilder->getInt64(packIndex));
     534        mCarryPackPtr[packIndex] = packPtr;
     535        mCarryInPack[packIndex] = iBuilder->CreateAlignedLoad(packPtr, mBitBlockWidth / 8);
     536    }
     537    return mCarryInPack[packIndex];
     538}
     539
     540/** ------------------------------------------------------------------------------------------------------------- *
     541 * @brief storeCarryOut
     542 ** ------------------------------------------------------------------------------------------------------------- */
     543void CarryManager::storeCarryOut(const unsigned packIndex) {
     544    assert (mCarryOutPack[packIndex]);
     545    assert (mCarryPackPtr[packIndex]);
     546    iBuilder->CreateAlignedStore(mCarryOutPack[packIndex], mCarryPackPtr[packIndex], mBitBlockWidth / 8);
     547}
     548
     549/** ------------------------------------------------------------------------------------------------------------- *
     550 * @brief generateBlockNoIncrement
     551 ** ------------------------------------------------------------------------------------------------------------- */
     552void CarryManager::generateBlockNoIncrement() {
     553    iBuilder->CreateStore(iBuilder->CreateAdd(mBlockNo, iBuilder->getInt64(1)), mBlockNoPtr);
     554}
     555
     556/* Helper routines */
     557
     558inline unsigned CarryManager::relativeFrameOffset(const unsigned frameOffset, const unsigned index) const {
     559    return mCurrentFrameIndex + frameOffset + index;
     560}
     561
     562inline unsigned CarryManager::addPosition(const unsigned localIndex) const {
     563    return relativeFrameOffset(mCarryInfo->addWithCarry.frameOffset, localIndex);
     564}
     565
     566inline unsigned CarryManager::unitAdvancePosition(const unsigned localIndex) const {
     567    return relativeFrameOffset(mCarryInfo->unitAdvance.frameOffset, localIndex);
     568}
     569
     570inline unsigned CarryManager::shortAdvancePosition(const unsigned localIndex) const {
     571    return relativeFrameOffset(mCarryInfo->shortAdvance.frameOffset, localIndex);
     572}
     573
     574inline unsigned CarryManager::longAdvancePosition(const unsigned localIndex) const {
     575    return (mCurrentFrameIndex + mCarryInfo->longAdvance.frameOffset) + localIndex;
     576}
     577
     578inline unsigned CarryManager::localBasePack() const {
     579    return (mCurrentFrameIndex + mCarryInfo->shortAdvance.frameOffset);
     580}
     581
     582inline unsigned CarryManager::scopeBasePack() const {
     583    return mCurrentFrameIndex;
     584}
     585
     586inline unsigned CarryManager::summaryPack() const {
     587    return relativeFrameOffset(mCarryInfo->summary.frameOffset, 0);
     588}
     589
     590inline bool CarryManager::hasSummary() const {
     591    return mCarryInfo->explicitSummaryRequired() && !(mCarryInfo->hasLongAdvances());
    767592}
    768593
Note: See TracChangeset for help on using the changeset viewer.