Changeset 5712 for icGREP


Ignore:
Timestamp:
Oct 27, 2017, 11:17:29 AM (16 months ago)
Author:
cameron
Message:

Fixes for indexed advance

Location:
icGREP/icgrep-devel/icgrep/pablo
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pablo/builder.cpp

    r5709 r5712  
    141141    }
    142142    else if (cast<Integer>(shiftAmount)->value() == 1) {
    143         return createAdvanceThenScanTo(expr, indexStream);
     143        return createAdvanceThenScanTo(createAnd(expr, indexStream), indexStream);
    144144    }
    145145    MAKE_TERNARY(createIndexedAdvance, TypeId::IndexedAdvance, expr, indexStream, shiftAmount);
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5711 r5712  
    7676 * @brief initializeCarryData
    7777 ** ------------------------------------------------------------------------------------------------------------- */
    78 void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, PabloKernel * const kernel) {
     78void CarryManager::initializeCarryData(const std::unique_ptr<kernel::KernelBuilder> & b, PabloKernel * const kernel) {
    7979
    8080    // Each scope constructs its own CarryData struct, which will be added to the final "carries" struct
     
    102102    mCarryMetadata.resize(getScopeCount(mCurrentScope));
    103103
    104     Type * const carryStateTy = analyse(iBuilder, mCurrentScope);
     104    Type * const carryStateTy = analyse(b, mCurrentScope);
    105105
    106106    kernel->addScalar(carryStateTy, "carries");
    107107
    108108    if (mHasLoop) {
    109         kernel->addScalar(iBuilder->getInt32Ty(), "selector");
     109        kernel->addScalar(b->getInt32Ty(), "selector");
    110110    }
    111111    if (mHasLongAdvance) {
    112         kernel->addScalar(iBuilder->getSizeTy(), "CarryBlockIndex");
     112        kernel->addScalar(b->getSizeTy(), "CarryBlockIndex");
    113113    }
    114114    for (unsigned i = 0; i < mIndexedLongAdvanceTotal; i++) {
    115         kernel->addScalar(iBuilder->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
     115        kernel->addScalar(b->getSizeTy(), "LongAdvancePosition" + std::to_string(i));
    116116    }
    117117}
     
    183183 * @brief initializeCodeGen
    184184 ** ------------------------------------------------------------------------------------------------------------- */
    185 void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     185void CarryManager::initializeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
    186186
    187187    assert(!mCarryMetadata.empty());
     
    189189    assert (!mCarryInfo->hasSummary());
    190190
    191     mCurrentFrame = iBuilder->getScalarFieldPtr("carries");
     191    mCurrentFrame = b->getScalarFieldPtr("carries");
    192192    mCurrentFrameIndex = 0;
    193193    mCarryScopes = 0;
     
    198198    assert (mCarrySummaryStack.empty());
    199199
    200     Type * const carryTy = iBuilder->getBitBlockType();
     200    Type * const carryTy = b->getBitBlockType();
    201201
    202202    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
    203203
    204204    if (mHasLoop) {       
    205         mLoopSelector = iBuilder->getScalarField("selector");
    206         mNextLoopSelector = iBuilder->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
     205        mLoopSelector = b->getScalarField("selector");
     206        mNextLoopSelector = b->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
    207207    }
    208208
     
    212212 * @brief finalizeCodeGen
    213213 ** ------------------------------------------------------------------------------------------------------------- */
    214 void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     214void CarryManager::finalizeCodeGen(const std::unique_ptr<kernel::KernelBuilder> & b) {
    215215    if (mHasLoop) {
    216         iBuilder->setScalarField("selector", mNextLoopSelector);
     216        b->setScalarField("selector", mNextLoopSelector);
    217217    }
    218218    if (mHasLongAdvance) {
    219         Value * idx = iBuilder->getScalarField("CarryBlockIndex");
    220         idx = iBuilder->CreateAdd(idx, iBuilder->getSize(1));
    221         iBuilder->setScalarField("CarryBlockIndex", idx);
     219        Value * idx = b->getScalarField("CarryBlockIndex");
     220        idx = b->CreateAdd(idx, b->getSize(1));
     221        b->setScalarField("CarryBlockIndex", idx);
    222222    }
    223223    assert (mCarryFrameStack.empty());   
     
    232232 * @brief enterLoopScope
    233233 ** ------------------------------------------------------------------------------------------------------------- */
    234 void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
     234void CarryManager::enterLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
    235235    assert (scope);
    236236    assert (mHasLoop);
    237237    ++mLoopDepth;
    238     enterScope(iBuilder, scope);
     238    enterScope(b, scope);
    239239}
    240240
     
    242242 * @brief enterLoopBody
    243243 ** ------------------------------------------------------------------------------------------------------------- */
    244 void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock) {
     244void CarryManager::enterLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock) {
    245245    if (mCarryInfo->hasSummary()) {
    246         Type * const carryTy = iBuilder->getBitBlockType();
    247         PHINode * phiCarryOutSummary = iBuilder->CreatePHI(carryTy, 2, "summary");
     246        Type * const carryTy = b->getBitBlockType();
     247        PHINode * phiCarryOutSummary = b->CreatePHI(carryTy, 2, "summary");
    248248        assert (!mCarrySummaryStack.empty());
    249249        phiCarryOutSummary->addIncoming(mCarrySummaryStack.back(), entryBlock);
     
    259259        assert (mCarryInfo->hasSummary());
    260260
    261         Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    262         Type * const carryTy = iBuilder->getBitBlockType();
     261        Type * const int8PtrTy = b->getInt8PtrTy();
     262        Type * const carryTy = b->getBitBlockType();
    263263        PointerType * const carryPtrTy = carryTy->getPointerTo();
    264264
    265265        // Check whether we need to resize the carry state
    266         PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     266        PHINode * index = b->CreatePHI(b->getSizeTy(), 2);
    267267        mLoopIndicies.push_back(index);
    268         index->addIncoming(iBuilder->getSize(0), entryBlock);
    269         Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    270         Value * capacity = iBuilder->CreateLoad(capacityPtr, "capacity");
     268        index->addIncoming(b->getSize(0), entryBlock);
     269        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
     270        Value * capacity = b->CreateLoad(capacityPtr, "capacity");
    271271        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
    272         Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    273         Value * array = iBuilder->CreateLoad(arrayPtr, "array");
    274         BasicBlock * const entry = iBuilder->GetInsertBlock();
    275         BasicBlock * const resizeCarryState = iBuilder->CreateBasicBlock("ResizeCarryState");
    276         BasicBlock * const reallocExisting = iBuilder->CreateBasicBlock("ReallocExisting");
    277         BasicBlock * const createNew = iBuilder->CreateBasicBlock("CreateNew");
    278         BasicBlock * const resumeKernel = iBuilder->CreateBasicBlock("ResumeKernel");
    279         iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
     272        Value * arrayPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(1)});
     273        Value * array = b->CreateLoad(arrayPtr, "array");
     274        BasicBlock * const entry = b->GetInsertBlock();
     275        BasicBlock * const resizeCarryState = b->CreateBasicBlock("ResizeCarryState");
     276        BasicBlock * const reallocExisting = b->CreateBasicBlock("ReallocExisting");
     277        BasicBlock * const createNew = b->CreateBasicBlock("CreateNew");
     278        BasicBlock * const resumeKernel = b->CreateBasicBlock("ResumeKernel");
     279        b->CreateLikelyCondBr(b->CreateICmpNE(index, capacity), resumeKernel, resizeCarryState);
    280280
    281281        // RESIZE CARRY BLOCK
    282         iBuilder->SetInsertPoint(resizeCarryState);
    283         const auto BlockWidth = iBuilder->getBitBlockWidth() / 8;
     282        b->SetInsertPoint(resizeCarryState);
     283        const auto BlockWidth = b->getBitBlockWidth() / 8;
    284284        const auto Log2BlockWidth = floor_log2(BlockWidth);
    285         Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
    286         Value * const summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
    287         Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
    288         iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
     285        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), b->getSizeTy(), false);
     286        Value * const summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
     287        Value * const hasCarryState = b->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
     288        b->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
    289289
    290290        // REALLOCATE EXISTING
    291         iBuilder->SetInsertPoint(reallocExisting);
    292         Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
    293         Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
    294         Value * const newArray = iBuilder->CreateCacheAlignedMalloc(newCapacitySize);
    295         iBuilder->CreateMemCpy(newArray, array, capacitySize, iBuilder->getCacheAlignment());
    296         iBuilder->CreateFree(array);
    297         iBuilder->CreateStore(newArray, arrayPtr);
    298         Value * const startNewArrayPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newArray, int8PtrTy), capacitySize);
    299         iBuilder->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
    300         Value * const newCapacity = iBuilder->CreateShl(capacity, 1);
    301         iBuilder->CreateStore(newCapacity, capacityPtr);
    302         Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
    303         Value * const summarySize = iBuilder->CreateShl(iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
    304         Constant * const additionalSpace = iBuilder->getSize(2 * BlockWidth);
    305         Value * const newSummarySize = iBuilder->CreateAdd(summarySize, additionalSpace);
    306         Value * const newSummary = iBuilder->CreateBlockAlignedMalloc(newSummarySize);
    307         iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
    308         iBuilder->CreateFree(summary);
    309         iBuilder->CreateStore(iBuilder->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
    310         Value * const startNewSummaryPtr = iBuilder->CreateGEP(iBuilder->CreatePointerCast(newSummary, int8PtrTy), summarySize);
    311         iBuilder->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
    312         iBuilder->CreateBr(resumeKernel);
     291        b->SetInsertPoint(reallocExisting);
     292        Value * const capacitySize = b->CreateMul(capacity, carryStateWidth);
     293        Value * const newCapacitySize = b->CreateShl(capacitySize, 1); // x 2
     294        Value * const newArray = b->CreateCacheAlignedMalloc(newCapacitySize);
     295        b->CreateMemCpy(newArray, array, capacitySize, b->getCacheAlignment());
     296        b->CreateFree(array);
     297        b->CreateStore(newArray, arrayPtr);
     298        Value * const startNewArrayPtr = b->CreateGEP(b->CreatePointerCast(newArray, int8PtrTy), capacitySize);
     299        b->CreateMemZero(startNewArrayPtr, capacitySize, BlockWidth);
     300        Value * const newCapacity = b->CreateShl(capacity, 1);
     301        b->CreateStore(newCapacity, capacityPtr);
     302        Value * const summary = b->CreateLoad(summaryPtr, false);
     303        Value * const summarySize = b->CreateShl(b->CreateAdd(b->CreateCeilLog2(capacity), ONE), Log2BlockWidth + 1);
     304        Constant * const additionalSpace = b->getSize(2 * BlockWidth);
     305        Value * const newSummarySize = b->CreateAdd(summarySize, additionalSpace);
     306        Value * const newSummary = b->CreateBlockAlignedMalloc(newSummarySize);
     307        b->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
     308        b->CreateFree(summary);
     309        b->CreateStore(b->CreatePointerCast(newSummary, carryPtrTy), summaryPtr);
     310        Value * const startNewSummaryPtr = b->CreateGEP(b->CreatePointerCast(newSummary, int8PtrTy), summarySize);
     311        b->CreateMemZero(startNewSummaryPtr, additionalSpace, BlockWidth);
     312        b->CreateBr(resumeKernel);
    313313
    314314        // CREATE NEW
    315         iBuilder->SetInsertPoint(createNew);
    316         Constant * const initialLog2Capacity = iBuilder->getInt64(4);
     315        b->SetInsertPoint(createNew);
     316        Constant * const initialLog2Capacity = b->getInt64(4);
    317317        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
    318         iBuilder->CreateStore(initialCapacity, capacityPtr);
     318        b->CreateStore(initialCapacity, capacityPtr);
    319319        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
    320         Value * initialArray = iBuilder->CreateCacheAlignedMalloc(initialCapacitySize);
    321         iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
    322         initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
    323         iBuilder->CreateStore(initialArray, arrayPtr);
    324         Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
    325         Value * initialSummary = iBuilder->CreateBlockAlignedMalloc(initialSummarySize);
    326         iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
    327         initialSummary = iBuilder->CreatePointerCast(initialSummary, carryPtrTy);
    328         iBuilder->CreateStore(initialSummary, summaryPtr);
    329         iBuilder->CreateBr(resumeKernel);
     320        Value * initialArray = b->CreateCacheAlignedMalloc(initialCapacitySize);
     321        b->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
     322        initialArray = b->CreatePointerCast(initialArray, array->getType());
     323        b->CreateStore(initialArray, arrayPtr);
     324        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, b->getInt64(1)), b->getInt64(Log2BlockWidth + 1));
     325        Value * initialSummary = b->CreateBlockAlignedMalloc(initialSummarySize);
     326        b->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
     327        initialSummary = b->CreatePointerCast(initialSummary, carryPtrTy);
     328        b->CreateStore(initialSummary, summaryPtr);
     329        b->CreateBr(resumeKernel);
    330330
    331331        // RESUME KERNEL
    332         iBuilder->SetInsertPoint(resumeKernel);
    333         PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
     332        b->SetInsertPoint(resumeKernel);
     333        PHINode * phiArrayPtr = b->CreatePHI(array->getType(), 3);
    334334        phiArrayPtr->addIncoming(array, entry);
    335335        phiArrayPtr->addIncoming(initialArray, createNew);
     
    338338        // NOTE: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
    339339        mCarryFrameStack.emplace_back(mCurrentFrame, 3);
    340         mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
     340        mCurrentFrame = b->CreateGEP(phiArrayPtr, index);
    341341    }
    342342}
     
    345345 * @brief leaveLoopBody
    346346 ** ------------------------------------------------------------------------------------------------------------- */
    347 void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * /* exitBlock */) {
    348 
    349     Type * const carryTy = iBuilder->getBitBlockType();
     347void CarryManager::leaveLoopBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * /* exitBlock */) {
     348
     349    Type * const carryTy = b->getBitBlockType();
    350350
    351351    if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
     
    353353        assert (mCarryInfo->hasSummary());
    354354
    355         ConstantInt * const summaryIndex = iBuilder->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
    356 
    357         Value * const carryInAccumulator = readCarryInSummary(iBuilder, summaryIndex);
     355        ConstantInt * const summaryIndex = b->getInt32(mCarryInfo->hasExplicitSummary() ? mCurrentFrameIndex : (mCurrentFrameIndex - 1));
     356
     357        Value * const carryInAccumulator = readCarryInSummary(b, summaryIndex);
    358358        Value * const carryOutAccumulator = mCarrySummaryStack.back();
    359359
    360360        if (mCarryInfo->hasExplicitSummary()) {
    361             writeCarryOutSummary(iBuilder, carryOutAccumulator, summaryIndex);
     361            writeCarryOutSummary(b, carryOutAccumulator, summaryIndex);
    362362        }
    363363
     
    375375        // Otherwise we will end up with an incorrect result or being trapped in an infinite loop.
    376376
    377         Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    378         Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
    379         Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
    380         Value * summary = iBuilder->CreateLoad(summaryPtr, false);
     377        Value * capacityPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(0)});
     378        Value * capacity = b->CreateLoad(capacityPtr, false);
     379        Value * summaryPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(2)});
     380        Value * summary = b->CreateLoad(summaryPtr, false);
    381381
    382382        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
    383383
    384         Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
    385 
    386         BasicBlock * entry = iBuilder->GetInsertBlock();
    387         BasicBlock * update = iBuilder->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
    388         BasicBlock * resume = iBuilder->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
    389 
    390         iBuilder->CreateBr(update);
    391 
    392         iBuilder->SetInsertPoint(update);
    393         PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
     384        Value * loopSelector = b->CreateZExt(mLoopSelector, capacity->getType());
     385
     386        BasicBlock * entry = b->GetInsertBlock();
     387        BasicBlock * update = b->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
     388        BasicBlock * resume = b->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
     389
     390        b->CreateBr(update);
     391
     392        b->SetInsertPoint(update);
     393        PHINode * i = b->CreatePHI(capacity->getType(), 2);
    394394        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
    395         PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
     395        PHINode * const borrow = b->CreatePHI(carryInAccumulator->getType(), 2);
    396396        borrow->addIncoming(carryInAccumulator, entry);
    397         PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
     397        PHINode * const carry = b->CreatePHI(carryOutAccumulator->getType(), 2);
    398398        carry->addIncoming(carryOutAccumulator, entry);
    399399        // OR the updated carry in summary later for the summaryTest
    400         PHINode * const carryInSummary = iBuilder->CreatePHI(carryTy, 2);
     400        PHINode * const carryInSummary = b->CreatePHI(carryTy, 2);
    401401        carryInSummary->addIncoming(Constant::getNullValue(carryTy), entry);
    402402
    403403        // half subtractor
    404         Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
    405         Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
    406         Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
    407         Value * const nextCarryIn = iBuilder->CreateXor(carryIn, borrow);
    408         Value * const nextSummary = iBuilder->CreateOr(carryInSummary, nextCarryIn);
    409 
    410         iBuilder->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
     404        Value * const carryInOffset = b->CreateOr(b->CreateShl(i, 1), loopSelector);
     405        Value * const carryInPtr = b->CreateGEP(summary, carryInOffset);
     406        Value * const carryIn = b->CreateBlockAlignedLoad(carryInPtr);
     407        Value * const nextCarryIn = b->CreateXor(carryIn, borrow);
     408        Value * const nextSummary = b->CreateOr(carryInSummary, nextCarryIn);
     409
     410        b->CreateBlockAlignedStore(nextCarryIn, carryInPtr);
    411411        carryInSummary->addIncoming(nextSummary, update);
    412         Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
     412        Value * finalBorrow = b->CreateAnd(b->CreateNot(carryIn), borrow);
    413413        borrow->addIncoming(finalBorrow, update);
    414414
    415415        // half adder
    416         Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
    417         Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
    418         Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
    419         Value * const nextCarryOut = iBuilder->CreateXor(carryOut, carry);
    420 
    421         iBuilder->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
    422         Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
     416        Value * const carryOutOffset = b->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
     417        Value * const carryOutPtr = b->CreateGEP(summary, carryOutOffset);
     418        Value * const carryOut = b->CreateBlockAlignedLoad(carryOutPtr);
     419        Value * const nextCarryOut = b->CreateXor(carryOut, carry);
     420
     421        b->CreateBlockAlignedStore(nextCarryOut, carryOutPtr);
     422        Value * finalCarry = b->CreateAnd(carryOut, carry);
    423423        carry->addIncoming(finalCarry, update);
    424424
    425425        // loop condition
    426         i->addIncoming(iBuilder->CreateAdd(i, ONE), update);
    427         iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
    428 
    429         iBuilder->SetInsertPoint(resume);
     426        i->addIncoming(b->CreateAdd(i, ONE), update);
     427        b->CreateCondBr(b->CreateICmpNE(b->CreateShl(ONE, i), capacity), update, resume);
     428
     429        b->SetInsertPoint(resume);
    430430
    431431        if (codegen::EnableAsserts) {
    432             iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
     432            b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry),
    433433                                       "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
    434434        }
     
    436436        assert (!mLoopIndicies.empty());
    437437        PHINode * index = mLoopIndicies.back();
    438         index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
     438        index->addIncoming(b->CreateAdd(index, b->getSize(1)), resume);
    439439        mLoopIndicies.pop_back();
    440440
     
    446446        mCarrySummaryStack.pop_back();
    447447        PHINode * phiCarryOut = cast<PHINode>(mCarrySummaryStack.back());
    448         phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
     448        phiCarryOut->addIncoming(carryOut, b->GetInsertBlock());
    449449        // If we're returning to the base scope, reset our accumulated summary value.
    450450        if (n == 2) {
     
    458458 * @brief leaveLoopScope
    459459 ** ------------------------------------------------------------------------------------------------------------- */
    460 void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
     460void CarryManager::leaveLoopScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
    461461    assert (mLoopDepth > 0);
    462462    --mLoopDepth;
    463     leaveScope(iBuilder);
     463    leaveScope(b);
    464464}
    465465
     
    467467 * @brief enterIfScope
    468468 ** ------------------------------------------------------------------------------------------------------------- */
    469 void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
     469void CarryManager::enterIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
    470470    ++mIfDepth;
    471     enterScope(iBuilder, scope);
     471    enterScope(b, scope);
    472472    // We zero-initialized the nested summary value and later OR in the current summary into the escaping summary
    473473    // so that upon processing the subsequent block iteration, we branch into this If scope iff a carry out was
     
    475475    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
    476476        assert (mCurrentFrameIndex == 0);
    477         mNextSummaryTest = readCarryInSummary(iBuilder, iBuilder->getInt32(0));
     477        mNextSummaryTest = readCarryInSummary(b, b->getInt32(0));
    478478        if (mCarryInfo->hasExplicitSummary()) {
    479479            mCurrentFrameIndex = 1;
    480480        }
    481481    }
    482     Type * const carryTy = iBuilder->getBitBlockType();
     482    Type * const carryTy = b->getBitBlockType();
    483483    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
    484484}
     
    487487 * @brief generateSummaryTest
    488488 ** ------------------------------------------------------------------------------------------------------------- */
    489 Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * condition) {
     489Value * CarryManager::generateSummaryTest(const std::unique_ptr<kernel::KernelBuilder> & b, Value * condition) {
    490490    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
    491491        assert ("summary test was not generated" && mNextSummaryTest);
    492         condition = iBuilder->simd_or(condition, mNextSummaryTest);
     492        condition = b->simd_or(condition, mNextSummaryTest);
    493493        mNextSummaryTest = nullptr;
    494494    }
     
    500500 * @brief enterIfBody
    501501 ** ------------------------------------------------------------------------------------------------------------- */
    502 void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */, BasicBlock * const entryBlock) {
     502void CarryManager::enterIfBody(const std::unique_ptr<kernel::KernelBuilder> & /* b */, BasicBlock * const entryBlock) {
    503503    assert (entryBlock);
    504504}
     
    507507 * @brief leaveIfBody
    508508 ** ------------------------------------------------------------------------------------------------------------- */
    509 void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const exitBlock) {
     509void CarryManager::leaveIfBody(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const exitBlock) {
    510510    assert (exitBlock);
    511511    const auto n = mCarrySummaryStack.size();
    512512    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
    513         writeCarryOutSummary(iBuilder, mCarrySummaryStack[n - 1], iBuilder->getInt32(0));
     513        writeCarryOutSummary(b, mCarrySummaryStack[n - 1], b->getInt32(0));
    514514    }
    515515    if (n > 2) {
    516         mCarrySummaryStack[n - 1] = iBuilder->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
     516        mCarrySummaryStack[n - 1] = b->CreateOr(mCarrySummaryStack[n - 1], mCarrySummaryStack[n - 2], "summary");
    517517    }
    518518}
     
    521521 * @brief leaveIfScope
    522522 ** ------------------------------------------------------------------------------------------------------------- */
    523 void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
     523void CarryManager::leaveIfScope(const std::unique_ptr<kernel::KernelBuilder> & b, BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
    524524    assert (mIfDepth > 0);
    525525    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
     
    531531            Value * outer = mCarrySummaryStack[n - 2];
    532532            assert (nested->getType() == outer->getType());
    533             PHINode * const phi = iBuilder->CreatePHI(nested->getType(), 2, "summary");
     533            PHINode * const phi = b->CreatePHI(nested->getType(), 2, "summary");
    534534            phi->addIncoming(outer, entryBlock);
    535535            phi->addIncoming(nested, exitBlock);
     
    538538    }
    539539    --mIfDepth;
    540     leaveScope(iBuilder);
     540    leaveScope(b);
    541541    mCarrySummaryStack.pop_back();
    542542}
     
    545545 * @brief enterScope
    546546 ** ------------------------------------------------------------------------------------------------------------- */
    547 void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope) {
     547void CarryManager::enterScope(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope) {
    548548    assert (scope);
    549549    // Store the state of the current frame and update the scope state
     
    555555    // compilation or a memory corruption has occured.
    556556    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
    557     mCurrentFrame = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
     557    mCurrentFrame = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
    558558    // Verify we're pointing to a carry frame struct
    559559    assert(mCurrentFrame->getType()->getPointerElementType()->isStructTy());
     
    564564 * @brief leaveScope
    565565 ** ------------------------------------------------------------------------------------------------------------- */
    566 void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* iBuilder */) {
     566void CarryManager::leaveScope(const std::unique_ptr<kernel::KernelBuilder> & /* b */) {
    567567
    568568    // Did we use all of the packs in this carry struct?
     
    586586 * @brief addCarryInCarryOut
    587587 ** ------------------------------------------------------------------------------------------------------------- */
    588 Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Statement * const operation, Value * const e1, Value * const e2) {
     588Value * CarryManager::addCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Statement * const operation, Value * const e1, Value * const e2) {
    589589    assert (operation && (isNonAdvanceCarryGeneratingStatement(operation)));
    590     Value * const carryIn = getNextCarryIn(iBuilder);
     590    Value * const carryIn = getNextCarryIn(b);
    591591    Value * carryOut, * result;
    592     std::tie(carryOut, result) = iBuilder->bitblock_add_with_carry(e1, e2, carryIn);
    593     setNextCarryOut(iBuilder, carryOut);
    594     assert (result->getType() == iBuilder->getBitBlockType());
     592    std::tie(carryOut, result) = b->bitblock_add_with_carry(e1, e2, carryIn);
     593    setNextCarryOut(b, carryOut);
     594    assert (result->getType() == b->getBitBlockType());
    595595    return result;
    596596}
     
    599599 * @brief advanceCarryInCarryOut
    600600 ** ------------------------------------------------------------------------------------------------------------- */
    601 Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const Advance * const advance, Value * const value) {
     601Value * CarryManager::advanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const Advance * const advance, Value * const value) {
    602602    const auto shiftAmount = advance->getAmount();
    603603    if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) {
    604         Value * const carryIn = getNextCarryIn(iBuilder);
     604        Value * const carryIn = getNextCarryIn(b);
    605605        Value * carryOut, * result;
    606         std::tie(carryOut, result) = iBuilder->bitblock_advance(value, carryIn, shiftAmount);
    607         setNextCarryOut(iBuilder, carryOut);
    608         assert (result->getType() == iBuilder->getBitBlockType());
     606        std::tie(carryOut, result) = b->bitblock_advance(value, carryIn, shiftAmount);
     607        setNextCarryOut(b, carryOut);
     608        assert (result->getType() == b->getBitBlockType());
    609609        return result;
    610610    } else {
    611         return longAdvanceCarryInCarryOut(iBuilder, value, shiftAmount);
    612     }
    613 }
    614 
     611        return longAdvanceCarryInCarryOut(b, value, shiftAmount);
     612    }
     613}
     614
     615/** ------------------------------------------------------------------------------------------------------------- *
     616 * @brief indexedAdvanceCarryInCarryOut
     617 ** ------------------------------------------------------------------------------------------------------------- */
    615618Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    616619    const auto shiftAmount = advance->getAmount();
     
    672675        }
    673676        b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     677        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
     678            addToCarryOutSummary(b, strm);
     679        }
    674680        return result;
    675681    } else {
     
    683689 * @brief longAdvanceCarryInCarryOut
    684690 ** ------------------------------------------------------------------------------------------------------------- */
    685 inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value, const unsigned shiftAmount) {
     691inline Value * CarryManager::longAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value, const unsigned shiftAmount) {
    686692
    687693    assert (mHasLongAdvance);
     
    689695    assert (value);
    690696
    691     const auto blockWidth = iBuilder->getBitBlockWidth();
    692     Type * const streamTy = iBuilder->getIntNTy(blockWidth);
     697    const auto blockWidth = b->getBitBlockWidth();
     698    Type * const streamTy = b->getIntNTy(blockWidth);
    693699
    694700    Value * indices[3];
    695701
    696     indices[0] = iBuilder->getInt32(0);
     702    indices[0] = b->getInt32(0);
    697703
    698704    if (mIfDepth > 0) {
     
    701707            // TODO: once CEILING(shiftAmount / 256) > 2, consider using a half-adder/subtractor strategy?
    702708
    703             Value * carry = iBuilder->CreateZExt(iBuilder->bitblock_any(value), streamTy);
     709            Value * carry = b->CreateZExt(b->bitblock_any(value), streamTy);
    704710            const auto summaryBlocks = ceil_udiv(shiftAmount, blockWidth);
    705711            const auto summarySize = ceil_udiv(summaryBlocks, blockWidth);
    706             VectorType * const bitBlockTy = iBuilder->getBitBlockType();
     712            VectorType * const bitBlockTy = b->getBitBlockType();
    707713            IntegerType * const laneTy = cast<IntegerType>(bitBlockTy->getVectorElementType());
    708714            const auto laneWidth = laneTy->getIntegerBitWidth();
     
    711717            assert (is_power_2(laneWidth));
    712718
    713             indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
     719            indices[1] = b->getInt32(mCurrentFrameIndex++);
    714720
    715721            for (unsigned i = 1;;++i) {
     
    717723                assert (i <= summarySize);
    718724
    719                 indices[2] = iBuilder->getInt32(i - 1);
    720 
    721                 Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, indices);
    722                 Value * const prior = iBuilder->CreateBitCast(iBuilder->CreateBlockAlignedLoad(ptr), streamTy);
     725                indices[2] = b->getInt32(i - 1);
     726
     727                Value * const ptr = b->CreateGEP(mCurrentFrame, indices);
     728                Value * const prior = b->CreateBitCast(b->CreateBlockAlignedLoad(ptr), streamTy);
    723729
    724730                Value * advanced = nullptr;
    725731                if (LLVM_LIKELY(summaryBlocks < laneWidth)) {
    726                     advanced = iBuilder->CreateOr(iBuilder->CreateShl(prior, 1), carry);
    727                     carry = iBuilder->CreateLShr(prior, summaryBlocks - 1);
     732                    advanced = b->CreateOr(b->CreateShl(prior, 1), carry);
     733                    carry = b->CreateLShr(prior, summaryBlocks - 1);
    728734                } else {
    729                     std::tie(advanced, carry) = iBuilder->bitblock_advance(prior, carry, 1);
     735                    std::tie(advanced, carry) = b->bitblock_advance(prior, carry, 1);
    730736                }
    731                 Value * stream = iBuilder->CreateBitCast(advanced, bitBlockTy);
     737                Value * stream = b->CreateBitCast(advanced, bitBlockTy);
    732738                if (LLVM_LIKELY(i == summarySize)) {
    733739                    const auto n = bitBlockTy->getVectorNumElements();
     
    741747                        std::fill_n(mask + m + 1, n - m, UndefValue::get(laneTy));
    742748                    }
    743                     stream = iBuilder->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
    744                     addToCarryOutSummary(iBuilder, stream);
    745                     iBuilder->CreateBlockAlignedStore(stream, ptr);
     749                    stream = b->CreateAnd(stream, ConstantVector::get(ArrayRef<Constant *>(mask, n)));
     750                    addToCarryOutSummary(b, stream);
     751                    b->CreateBlockAlignedStore(stream, ptr);
    746752                    break;
    747753                }
    748                 addToCarryOutSummary(iBuilder, stream);
    749                 iBuilder->CreateBlockAlignedStore(stream, ptr);
     754                addToCarryOutSummary(b, stream);
     755                b->CreateBlockAlignedStore(stream, ptr);
    750756            }
    751757
    752758        } else if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
    753             addToCarryOutSummary(iBuilder, value);
    754         }
    755     }
    756 
    757     indices[1] = iBuilder->getInt32(mCurrentFrameIndex++);
     759            addToCarryOutSummary(b, value);
     760        }
     761    }
     762
     763    indices[1] = b->getInt32(mCurrentFrameIndex++);
    758764
    759765    // special case using a single buffer entry and the carry_out value.
    760766    if (LLVM_LIKELY((shiftAmount < blockWidth) && (mLoopDepth == 0))) {
    761767
    762         indices[2] = indices[0]; // iBuilder->getInt32(0)
     768        indices[2] = indices[0]; // b->getInt32(0)
    763769        assert (cast<ConstantInt>(indices[2])->isNullValue());
    764770
    765         Value * const buffer = iBuilder->CreateGEP(mCurrentFrame, indices);
    766         assert (buffer->getType()->getPointerElementType() == iBuilder->getBitBlockType());
    767         Value * carryIn = iBuilder->CreateBlockAlignedLoad(buffer);
    768 
    769         iBuilder->CreateBlockAlignedStore(value, buffer);
     771        Value * const buffer = b->CreateGEP(mCurrentFrame, indices);
     772        assert (buffer->getType()->getPointerElementType() == b->getBitBlockType());
     773        Value * carryIn = b->CreateBlockAlignedLoad(buffer);
     774
     775        b->CreateBlockAlignedStore(value, buffer);
    770776        /* Very special case - no combine */
    771777        if (LLVM_UNLIKELY(shiftAmount == blockWidth)) {
    772             return iBuilder->CreateBitCast(carryIn, iBuilder->getBitBlockType());
    773         }
    774         Value* block0_shr = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
    775         Value* block1_shl = iBuilder->CreateShl(iBuilder->CreateBitCast(value, streamTy), shiftAmount);
    776         return iBuilder->CreateBitCast(iBuilder->CreateOr(block1_shl, block0_shr), iBuilder->getBitBlockType());
     778            return b->CreateBitCast(carryIn, b->getBitBlockType());
     779        }
     780        Value* block0_shr = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - shiftAmount);
     781        Value* block1_shl = b->CreateShl(b->CreateBitCast(value, streamTy), shiftAmount);
     782        return b->CreateBitCast(b->CreateOr(block1_shl, block0_shr), b->getBitBlockType());
    777783    } else { //
    778784        const unsigned blockShift = shiftAmount & (blockWidth - 1);
     
    780786
    781787        // Create a mask to implement circular buffer indexing
    782         Value * indexMask = iBuilder->getSize(nearest_pow2(summaryBlocks) - 1);
    783         Value * blockIndex = iBuilder->getScalarField("CarryBlockIndex");
    784 
    785         Value * carryIndex0 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks));
    786         indices[2] = iBuilder->CreateAnd(carryIndex0, indexMask);
    787         Value * const carryInPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
    788         Value * carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
    789 
    790         indices[2] = iBuilder->CreateAnd(blockIndex, indexMask);
    791         Value * const carryOutPtr = iBuilder->CreateGEP(mCurrentFrame, indices);
    792         assert (carryIn->getType() == iBuilder->getBitBlockType());
     788        Value * indexMask = b->getSize(nearest_pow2(summaryBlocks) - 1);
     789        Value * blockIndex = b->getScalarField("CarryBlockIndex");
     790
     791        Value * carryIndex0 = b->CreateSub(blockIndex, b->getSize(summaryBlocks));
     792        indices[2] = b->CreateAnd(carryIndex0, indexMask);
     793        Value * const carryInPtr = b->CreateGEP(mCurrentFrame, indices);
     794        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
     795
     796        indices[2] = b->CreateAnd(blockIndex, indexMask);
     797        Value * const carryOutPtr = b->CreateGEP(mCurrentFrame, indices);
     798        assert (carryIn->getType() == b->getBitBlockType());
    793799
    794800        // If the long advance is an exact multiple of BitBlockWidth, we simply return the oldest
    795801        // block in the long advance carry data area.
    796802        if (LLVM_UNLIKELY(blockShift == 0)) {
    797             iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
     803            b->CreateBlockAlignedStore(value, carryOutPtr);
    798804            return carryIn;
    799805        } else { // Otherwise we need to combine data from the two oldest blocks.
    800             Value * const carryIndex1 = iBuilder->CreateSub(blockIndex, iBuilder->getSize(summaryBlocks - 1));
    801             indices[2] = iBuilder->CreateAnd(carryIndex1, indexMask);
    802 
    803             Value * const carryInPtr2 = iBuilder->CreateGEP(mCurrentFrame, indices);
    804             Value * const carryIn2 = iBuilder->CreateBlockAlignedLoad(carryInPtr2);
     806            Value * const carryIndex1 = b->CreateSub(blockIndex, b->getSize(summaryBlocks - 1));
     807            indices[2] = b->CreateAnd(carryIndex1, indexMask);
     808
     809            Value * const carryInPtr2 = b->CreateGEP(mCurrentFrame, indices);
     810            Value * const carryIn2 = b->CreateBlockAlignedLoad(carryInPtr2);
    805811            assert (carryOutPtr->getType()->getPointerElementType() == value->getType());
    806             iBuilder->CreateBlockAlignedStore(value, carryOutPtr);
    807 
    808             Value * const b0 = iBuilder->CreateLShr(iBuilder->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
    809             Value * const b1 = iBuilder->CreateShl(iBuilder->CreateBitCast(carryIn2, streamTy), blockShift);
    810             return iBuilder->CreateBitCast(iBuilder->CreateOr(b1, b0), iBuilder->getBitBlockType());
     812            b->CreateBlockAlignedStore(value, carryOutPtr);
     813
     814            Value * const b0 = b->CreateLShr(b->CreateBitCast(carryIn, streamTy), blockWidth - blockShift);
     815            Value * const b1 = b->CreateShl(b->CreateBitCast(carryIn2, streamTy), blockShift);
     816            return b->CreateBitCast(b->CreateOr(b1, b0), b->getBitBlockType());
    811817        }
    812818    }
     
    816822 * @brief getNextCarryIn
    817823 ** ------------------------------------------------------------------------------------------------------------- */
    818 Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     824Value * CarryManager::getNextCarryIn(const std::unique_ptr<kernel::KernelBuilder> & b) {
    819825    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
    820826    if (mLoopDepth == 0) {
    821         mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex)});
     827        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex)});
    822828    } else {
    823         mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mLoopSelector});
    824     }
    825     Type * const carryTy = iBuilder->getBitBlockType();
     829        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mLoopSelector});
     830    }
     831    Type * const carryTy = b->getBitBlockType();
    826832    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
    827     Value * const carryIn = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
     833    Value * const carryIn = b->CreateBlockAlignedLoad(mCarryPackPtr);
    828834    if (mLoopDepth > 0) {
    829         iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
     835        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), mCarryPackPtr);
    830836    }
    831837    return carryIn;
     
    835841 * @brief setNextCarryOut
    836842 ** ------------------------------------------------------------------------------------------------------------- */
    837 void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * carryOut) {
    838     Type * const carryTy = iBuilder->getBitBlockType();
     843void CarryManager::setNextCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, Value * carryOut) {
     844    Type * const carryTy = b->getBitBlockType();
    839845    assert (mCurrentFrameIndex < mCurrentFrame->getType()->getPointerElementType()->getStructNumElements());
    840     carryOut = iBuilder->CreateBitCast(carryOut, carryTy);
     846    carryOut = b->CreateBitCast(carryOut, carryTy);
    841847    if (mCarryInfo->hasSummary()) {
    842         addToCarryOutSummary(iBuilder, carryOut);
     848        addToCarryOutSummary(b, carryOut);
    843849    }
    844850    if (mLoopDepth != 0) {
    845         mCarryPackPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(mCurrentFrameIndex), mNextLoopSelector});
     851        mCarryPackPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex), mNextLoopSelector});
    846852        if (LLVM_LIKELY(!mCarryInfo->nonCarryCollapsingMode())) {
    847             Value * accum = iBuilder->CreateBlockAlignedLoad(mCarryPackPtr);
    848             carryOut = iBuilder->CreateOr(carryOut, accum);
     853            Value * accum = b->CreateBlockAlignedLoad(mCarryPackPtr);
     854            carryOut = b->CreateOr(carryOut, accum);
    849855        }
    850856    }
    851857    ++mCurrentFrameIndex;
    852858    assert (mCarryPackPtr->getType()->getPointerElementType() == carryTy);
    853     iBuilder->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
     859    b->CreateBlockAlignedStore(carryOut, mCarryPackPtr);
    854860}
    855861
     
    857863 * @brief readCarryInSummary
    858864 ** ------------------------------------------------------------------------------------------------------------- */
    859 Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, ConstantInt * index) const {
     865Value * CarryManager::readCarryInSummary(const std::unique_ptr<kernel::KernelBuilder> & b, ConstantInt * index) const {
    860866    assert (mCarryInfo->hasSummary());
    861867    unsigned count = 2;
     
    871877    const unsigned length = (mLoopDepth == 0) ? count : (count + 1);
    872878    Value * indicies[length];
    873     std::fill(indicies, indicies + count - 1, iBuilder->getInt32(0));
     879    std::fill(indicies, indicies + count - 1, b->getInt32(0));
    874880    indicies[count - 1] = index;
    875881    if (mLoopDepth != 0) {
     
    878884
    879885    ArrayRef<Value *> ar(indicies, length);
    880     Value * const ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
    881     Value * const summary = iBuilder->CreateBlockAlignedLoad(ptr);
     886    Value * const ptr = b->CreateGEP(mCurrentFrame, ar);
     887    Value * const summary = b->CreateBlockAlignedLoad(ptr);
    882888    if (mLoopDepth != 0 && mCarryInfo->hasExplicitSummary()) {
    883         Type * const carryTy = iBuilder->getBitBlockType();
    884         iBuilder->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
     889        Type * const carryTy = b->getBitBlockType();
     890        b->CreateBlockAlignedStore(Constant::getNullValue(carryTy), ptr);
    885891    }
    886892    return summary;
     
    890896 * @brief writeCarryOutSummary
    891897 ** ------------------------------------------------------------------------------------------------------------- */
    892 inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const summary, ConstantInt * index) const {
     898inline void CarryManager::writeCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const summary, ConstantInt * index) const {
    893899    Value * ptr = nullptr;
    894900    assert (mCarryInfo->hasExplicitSummary());
    895901    if (mLoopDepth > 0) {
    896         ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index, mNextLoopSelector});
     902        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index, mNextLoopSelector});
    897903    } else {
    898         ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), index});
    899     }
    900     iBuilder->CreateBlockAlignedStore(summary, ptr);
     904        ptr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), index});
     905    }
     906    b->CreateBlockAlignedStore(summary, ptr);
    901907}
    902908
     
    904910 * @brief addToCarryOutSummary
    905911 ** ------------------------------------------------------------------------------------------------------------- */
    906 inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, Value * const value) {
     912inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value) {
    907913    assert ("cannot add null summary value!" && value);   
    908914    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
    909915    assert (mCarryInfo->hasSummary());
    910     mCarrySummaryStack.back() = iBuilder->CreateOr(value, mCarrySummaryStack.back());
     916    mCarrySummaryStack.back() = b->CreateOr(value, mCarrySummaryStack.back());
    911917}
    912918
     
    957963 * @brief analyse
    958964 ** ------------------------------------------------------------------------------------------------------------- */
    959 StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const PabloBlock * const scope,
     965StructType * CarryManager::analyse(const std::unique_ptr<kernel::KernelBuilder> & b, const PabloBlock * const scope,
    960966                                   const unsigned ifDepth, const unsigned loopDepth, const bool isNestedWithinNonCarryCollapsingLoop) {
    961967    assert ("scope cannot be null!" && scope);
     
    963969            && (mCarryScopes == 0 ? (scope == mKernel->getEntryBlock()) : (scope != mKernel->getEntryBlock())));
    964970    assert (mCarryScopes < mCarryMetadata.size());
    965     Type * const carryTy = iBuilder->getBitBlockType();
    966     Type * const blockTy = iBuilder->getBitBlockType();
     971    Type * const carryTy = b->getBitBlockType();
     972    Type * const blockTy = b->getBitBlockType();
    967973
    968974    const unsigned carryScopeIndex = mCarryScopes++;
     
    977983            Type * type = carryPackType;
    978984            if (LLVM_UNLIKELY(amount >= LONG_ADVANCE_BREAKPOINT)) {
    979                 const auto blockWidth = iBuilder->getBitBlockWidth();
     985                const auto blockWidth = b->getBitBlockWidth();
    980986                const auto blocks = ceil_udiv(amount, blockWidth);
    981987                type = ArrayType::get(blockTy, nearest_pow2(blocks + ((loopDepth != 0) ? 1 : 0)));
     
    992998            state.push_back(carryPackType);
    993999        } else if (LLVM_UNLIKELY(isa<If>(stmt))) {
    994             state.push_back(analyse(iBuilder, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
     1000            state.push_back(analyse(b, cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
    9951001        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
    9961002            mHasLoop = true;
    997             state.push_back(analyse(iBuilder, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
     1003            state.push_back(analyse(b, cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1, nonCarryCollapsingMode | isNestedWithinNonCarryCollapsingLoop));
    9981004        }
    9991005    }
     
    10031009    CarryData::SummaryType summaryType = CarryData::NoSummary;
    10041010    if (LLVM_UNLIKELY(state.empty())) {
    1005         carryState = StructType::get(iBuilder->getContext());
     1011        carryState = StructType::get(b->getContext());
    10061012    } else {
    10071013        // do we have a summary or a sequence of nested empty structs?
     
    10211027            }
    10221028        }
    1023         carryState = StructType::get(iBuilder->getContext(), state);
     1029        carryState = StructType::get(b->getContext(), state);
    10241030        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
    10251031        // carry state pointer, and summary pointer struct.
    10261032        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
    10271033            mHasNonCarryCollapsingLoops = true;
    1028             carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo(), nullptr);
     1034            carryState = StructType::get(b->getSizeTy(), carryState->getPointerTo(), carryTy->getPointerTo(), nullptr);
    10291035            assert (isDynamicallyAllocatedType(carryState));
    10301036        }
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5710 r5712  
    626626
    627627
     628/** ------------------------------------------------------------------------------------------------------------- *
     629 * @brief indexedAdvanceCarryInCarryOut
     630 ** ------------------------------------------------------------------------------------------------------------- */
    628631Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) {
    629632    const auto shiftAmount = advance->getAmount();
    630     if (LLVM_LIKELY(shiftAmount < mElementWidth)) {
     633    Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
     634    Value * PEXT_f = nullptr;
     635    Value * PDEP_f = nullptr;
     636    unsigned bitWidth = sizeof(size_t) * 8;
     637    if (bitWidth == 64) {
     638        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
     639        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
     640    }
     641    else if ((bitWidth == 32)  && (shiftAmount < 32)) {
     642        PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
     643        PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
     644    }
     645    else {
     646        llvm::report_fatal_error("indexed_advance unsupported bit width");
     647    }
     648    if (LLVM_LIKELY(shiftAmount < bitWidth)) {
    631649        Value * const carryIn = getNextCarryIn(b);
    632         unsigned bitWidth = sizeof(size_t) * 8;
    633         Value * popcount_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::ctpop, b->getSizeTy());
    634         Value * PEXT_f = nullptr;
    635         Value * PDEP_f = nullptr;
    636         if (bitWidth == 64) {
    637             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_64);
    638             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    639         }
    640         else if ((bitWidth == 32)  && (shiftAmount < 32)) {
    641             PEXT_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pext_32);
    642             PDEP_f = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    643         }
    644         else {
    645             llvm::report_fatal_error("indexed_advance unsupported bit width");
    646         }
    647650        Value * shiftVal = b->getSize(shiftAmount);
    648651        Value * carry = b->mvmd_extract(bitWidth, carryIn, 0);
     
    656659            Value * popcount_small = b->CreateICmpULT(ix_popcnt, shiftVal);
    657660            Value * carry_if_popcount_small =
    658                 b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
    659                             b->CreateLShr(carry, ix_popcnt));
     661            b->CreateOr(b->CreateShl(bits, b->CreateSub(shiftVal, ix_popcnt)),
     662                        b->CreateLShr(carry, ix_popcnt));
    660663            Value * carry_if_popcount_large = b->CreateLShr(bits, b->CreateSub(ix_popcnt, shiftVal));
    661664            carry = b->CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large);
     
    665668        setNextCarryOut(b, carryOut);
    666669        return result;
     670    } else if (shiftAmount <= b->getBitBlockWidth()) {
     671        // A single bitblock still holds all the shifted bits.   In this case, we know
     672        // that the shift amount is always greater than the popcount of the individual
     673        // elements that we deal with.   This simplifies some of the logic.
     674        Type * iBitBlock = b->getIntNTy(b->getBitBlockWidth());
     675        Value * carryInPtr = b->CreateGEP(mCurrentFrame, {b->getInt32(0), b->getInt32(mCurrentFrameIndex++), b->getInt32(0)});
     676        Value * carryIn = b->CreateBlockAlignedLoad(carryInPtr);
     677        Value * shiftVal = b->getSize(shiftAmount);
     678        Value * carry = b->CreateBitCast(carryIn, iBitBlock);
     679        Value * result = b->allZeroes();
     680        for (unsigned i = 0; i < b->getBitBlockWidth()/bitWidth; i++) {
     681            Value * s = b->mvmd_extract(bitWidth, strm, i);
     682            Value * ix = b->mvmd_extract(bitWidth, index_strm, i);
     683            Value * ix_popcnt = b->CreateCall(popcount_f, {ix});
     684            Value * bits = b->CreateCall(PEXT_f, {s, ix});  // All these bits are shifted out (appended to carry).
     685            result = b->mvmd_insert(bitWidth, result, b->CreateCall(PDEP_f, {b->mvmd_extract(bitWidth, carry, 0), ix}), i);
     686            carry = b->CreateLShr(carry, b->CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits.
     687            carry = b->CreateOr(carry, b->CreateShl(b->CreateZExt(bits, iBitBlock), b->CreateZExt(b->CreateSub(shiftVal, ix_popcnt), iBitBlock)));
     688        }
     689        b->CreateBlockAlignedStore(b->CreateBitCast(carry, b->getBitBlockType()), carryInPtr);
     690        if ((mIfDepth > 0) && mCarryInfo->hasExplicitSummary()) {
     691            addToCarryOutSummary(b, strm);
     692        }
     693        return result;
    667694    } else {
    668         llvm::report_fatal_error("IndexedAdvance > mElementWidth not yet supported.");
     695        mIndexedLongAdvanceIndex++;
     696        llvm::report_fatal_error("IndexedAdvance > BlockSize not yet supported.");
    669697    }
    670698}
Note: See TracChangeset for help on using the changeset viewer.