Ignore:
Timestamp:
Mar 13, 2017, 3:02:13 PM (2 years ago)
Author:
nmedfort
Message:

Work on non-carry collapsing mode.

Location:
icGREP/icgrep-devel/icgrep
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5353 r5361  
    1313#include <fcntl.h>
    1414#include <toolchain.h>
     15
     16#include <llvm/Support/raw_ostream.h>
    1517
    1618using namespace llvm;
     
    106108}
    107109
    108 Value * CBuilder::CreateMalloc(Type * type, Value * size) {
    109     DataLayout DL(getModule());
     110Value * CBuilder::CreateMalloc(Value * size) {
     111    Module * const m = getModule();
     112    DataLayout DL(m);
    110113    IntegerType * const intTy = getIntPtrTy(DL);
    111114    if (size->getType() != intTy) {
     
    116119        }
    117120    }   
    118     Constant * width = ConstantExpr::getSizeOf(type);
    119     if (LLVM_UNLIKELY(width->getType() != intTy)) {
    120         width = ConstantExpr::getIntegerCast(width, intTy, false);
    121     }
    122     if (!width->isOneValue()) {
    123         if (isa<Constant>(size)) {
    124             size = ConstantExpr::getMul(cast<Constant>(size), width);
    125         } else {
    126             size = CreateMul(size, width);
    127         }
    128     }
    129     Module * const m = getModule();
     121    PointerType * const voidPtrTy = getVoidPtrTy();
    130122    Function * malloc = m->getFunction("malloc");
    131123    if (malloc == nullptr) {
    132         PointerType * const voidPtrTy = getVoidPtrTy();
    133124        FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
    134125        malloc = Function::Create(fty, Function::ExternalLinkage, "malloc", mMod);
     
    137128    }
    138129    assert (size->getType() == intTy);
    139     CallInst * ci = CreateCall(malloc, size);
     130    CallInst * ci = CreateCall(malloc, size); assert (ci);
    140131    ci->setTailCall();
    141132    ci->setCallingConv(malloc->getCallingConv());
    142     Value * ptr = CreateBitOrPointerCast(ci, type->getPointerTo());
     133    Value * ptr = CreatePointerCast(ci, voidPtrTy); assert (ptr);
    143134    CreateAssert(ptr, "FATAL ERROR: out of memory");
    144135    return ptr;
    145136}
    146137
    147 Value * CBuilder::CreateAlignedMalloc(Type * type, Value * size, const unsigned alignment) {
    148     assert ((alignment & (alignment - 1)) == 0); // is power of 2
    149     DataLayout DL(getModule());
     138Value * CBuilder::CreateAlignedMalloc(Value * size, const unsigned alignment) {
     139    if (LLVM_UNLIKELY((alignment & (alignment - 1)) != 0)) {
     140        report_fatal_error("CreateAlignedMalloc: alignment must be a power of 2");
     141    }
     142    DataLayout DL(mMod);
    150143    IntegerType * const intTy = getIntPtrTy(DL);
    151     if (size->getType() != intTy) {
    152         if (isa<Constant>(size)) {
    153             size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    154         } else {
    155             size = CreateZExtOrTrunc(size, intTy);
    156         }
    157     }
    158     const auto byteWidth = (intTy->getBitWidth() / 8);
    159     Constant * const offset = ConstantInt::get(intTy, alignment + byteWidth - 1);
    160     Constant * width = ConstantExpr::getSizeOf(type);
    161     if (LLVM_UNLIKELY(width->getType() != intTy)) {
    162         width = ConstantExpr::getIntegerCast(width, intTy, false);
    163     }
    164     if (!width->isOneValue()) {
    165         if (isa<Constant>(size)) {
    166             size = ConstantExpr::getMul(cast<Constant>(size), width);
    167         } else {
    168             size = CreateMul(size, width);
    169         }
    170     }
    171     if (isa<Constant>(size)) {
    172         size = ConstantExpr::getAdd(cast<Constant>(size), offset);
    173     } else {
     144    Function * aligned_malloc = mMod->getFunction("aligned_malloc" + std::to_string(alignment));
     145    if (LLVM_UNLIKELY(aligned_malloc == nullptr)) {
     146        const auto ip = saveIP();
     147        PointerType * const voidPtrTy = getVoidPtrTy();
     148        FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
     149        aligned_malloc = Function::Create(fty, Function::InternalLinkage, "aligned_malloc" + std::to_string(alignment), mMod);
     150        aligned_malloc->setCallingConv(CallingConv::C);
     151        aligned_malloc->setDoesNotAlias(0);
     152        aligned_malloc->addFnAttr(Attribute::AlwaysInline);
     153        Value * size = &*aligned_malloc->arg_begin();
     154        SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", aligned_malloc));
     155        const auto byteWidth = (intTy->getBitWidth() / 8);
     156        Constant * const offset = ConstantInt::get(intTy, alignment + byteWidth - 1);
    174157        size = CreateAdd(size, offset);
    175     }
    176     assert (size->getType() == intTy);
    177     Value * unaligned = CreatePtrToInt(CreateMalloc(getInt8Ty(), size), intTy);
    178     Value * aligned = CreateAnd(CreateAdd(unaligned, offset), ConstantExpr::getNot(ConstantInt::get(intTy, alignment - 1)));
    179     Value * prefix = CreateIntToPtr(CreateSub(aligned, ConstantInt::get(intTy, byteWidth)), intTy->getPointerTo());
    180     assert (unaligned->getType() == prefix->getType()->getPointerElementType());
    181     CreateAlignedStore(unaligned, prefix, byteWidth);
    182     return CreateIntToPtr(aligned, type->getPointerTo());
     158        Value * unaligned = CreatePtrToInt(CreateMalloc(size), intTy);
     159        Value * aligned = CreateAnd(CreateAdd(unaligned, offset), ConstantExpr::getNot(ConstantInt::get(intTy, alignment - 1)));
     160        Value * prefix = CreateIntToPtr(CreateSub(aligned, ConstantInt::get(intTy, byteWidth)), intTy->getPointerTo());
     161        assert (unaligned->getType() == prefix->getType()->getPointerElementType());
     162        CreateAlignedStore(unaligned, prefix, byteWidth);
     163        CreateRet(CreateIntToPtr(aligned, voidPtrTy));
     164        restoreIP(ip);
     165    }
     166    return CreateCall(aligned_malloc, {CreateZExtOrTrunc(size, intTy)});
    183167}
    184168
     
    230214    IntegerType * const intTy = getIntPtrTy(DL);
    231215    PointerType * type = cast<PointerType>(ptr->getType());
    232     Constant * width = ConstantExpr::getSizeOf(type->getPointerElementType());
    233     if (LLVM_UNLIKELY(width->getType() != intTy)) {
    234         width = ConstantExpr::getIntegerCast(width, intTy, false);
    235     }
    236216    if (size->getType() != intTy) {
    237217        if (isa<Constant>(size)) {
     
    239219        } else {
    240220            size = CreateZExtOrTrunc(size, intTy);
    241         }
    242     }
    243     if (!width->isOneValue()) {
    244         if (isa<Constant>(size)) {
    245             size = ConstantExpr::getMul(cast<Constant>(size), width);
    246         } else {
    247             size = CreateMul(size, width);
    248221        }
    249222    }
     
    275248   
    276249}
     250
    277251StoreInst * CBuilder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
    278252    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
     
    281255    return inst;
    282256}
    283 
    284257
    285258PointerType * CBuilder::getFILEptrTy() {
     
    390363            Value * len = CreateAdd(sz, getSize(21));
    391364            ConstantInt * _11 = getSize(11);
    392             Value * bytes = CreateMalloc(getInt8Ty(), len);
     365            Value * bytes = CreatePointerCast(CreateMalloc(len), getInt8PtrTy());
    393366            CreateMemCpy(bytes, CreateGlobalStringPtr("Assertion `"), _11, 1);
    394367            CreateMemCpy(CreateGEP(bytes, _11), msg, sz, 1);
     
    424397}
    425398
     399llvm::Value * CBuilder::CreateCeilLog2(llvm::Value * value) {
     400    IntegerType * ty = cast<IntegerType>(value->getType());
     401    CreateAssert(value, "CreateCeilLog2: value cannot be zero");
     402    Value * m = CreateCall(Intrinsic::getDeclaration(mMod, Intrinsic::ctlz, ty), {value, ConstantInt::getFalse(getContext())});
     403    Value * isPowOf2 = CreateICmpEQ(CreateAnd(value, CreateSub(value, ConstantInt::get(ty, 1))), ConstantInt::getNullValue(ty));
     404    m = CreateSub(ConstantInt::get(m->getType(), ty->getBitWidth() - 1), m);
     405    return CreateSelect(isPowOf2, m, CreateAdd(m, ConstantInt::get(m->getType(), 1)));
     406}
     407
    426408CBuilder::CBuilder(Module * const m, const unsigned GeneralRegisterWidthInBits, const bool SupportsIndirectBr, const unsigned CacheLineAlignmentInBytes)
    427409: IRBuilder<>(m->getContext())
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5353 r5361  
    3131        mMod = m;
    3232    }
    33    
    34     llvm::Value * CreateMalloc(llvm::Type * type, llvm::Value * size);
    3533
    36     llvm::Value * CreateAlignedMalloc(llvm::Type * type, llvm::Value * size, const unsigned alignment);
     34    llvm::Value * CreateMalloc(llvm::Value * size);
     35
     36    llvm::Value * CreateAlignedMalloc(llvm::Value * size, const unsigned alignment);
    3737   
    3838    void CreateFree(llvm::Value * const ptr);
     
    123123    }
    124124
     125    llvm::Value * CreateCeilLog2(llvm::Value * value);
     126
    125127protected:
    126     llvm::Module *      mMod;
     128    llvm::Module *      mMod;   
    127129    unsigned            mCacheLineAlignment;
    128130    llvm::IntegerType * mSizeType;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5329 r5361  
    1515namespace IDISA {
    1616   
    17 IDISA_Builder * GetIDISA_Builder(llvm::Module * mod) {
     17IDISA_Builder * GetIDISA_Builder(llvm::Module * const mod) {
    1818    if (LLVM_UNLIKELY(mod == nullptr)) {
    1919        report_fatal_error("GetIDISA_Builder: module cannot be null");
     
    2222        mod->setTargetTriple(llvm::sys::getProcessTriple());
    2323    }
     24    unsigned registerWidth = 0;
    2425    Triple T(mod->getTargetTriple());
    25     unsigned registerWidth = 32;
    2626    if (T.isArch64Bit()) {
    2727        registerWidth = 64;
     
    4545}
    4646
    47 IDISA_Builder * GetIDISA_GPU_Builder(llvm::Module * mod) {
     47IDISA_Builder * GetIDISA_GPU_Builder(llvm::Module * const mod) {
    4848    return new IDISA_NVPTX20_Builder(mod, 64);
    4949}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.h

    r5260 r5361  
    1212namespace IDISA {
    1313   
    14 IDISA::IDISA_Builder * GetIDISA_Builder(llvm::Module * m);
     14IDISA::IDISA_Builder * GetIDISA_Builder(llvm::Module * const m);
    1515
    16 IDISA::IDISA_Builder * GetIDISA_GPU_Builder(llvm::Module * m);
     16IDISA::IDISA_Builder * GetIDISA_GPU_Builder(llvm::Module * const m);
    1717
    1818}
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5353 r5361  
    8383        PabloAST * pscan = body.createAdvanceThenScanTo(pending_lparen, in_play, "pscan");
    8484
    85         PabloAST * closed = body.createAnd(pscan, rparen, "closed");
     85        PabloAST * closed = body.createAnd(pscan, rparen, "closed_rparen");
    8686        body.createAssign(all_closed, body.createOr(all_closed, closed));
    8787
     
    9393        body.createAssign(body.createExtract(matches, index), closed);
    9494
    95         PabloAST * pending_rparen = body.createAnd(rparen, body.createNot(all_closed), "pending_rparen");
     95        PabloAST * pending_rparen = body.createAnd(rparen, body.createNot(all_closed, "open_rparen"), "pending_rparen");
    9696        body.createAssign(in_play, body.createOr(pending_lparen, pending_rparen));
    9797        body.createAssign(index, body.createAdd(index, body.getInteger(1)));
     
    123123    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    124124
    125     kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
     125    MMapSourceKernel mmapK(iBuilder, segmentSize);
    126126    mmapK.generateKernel({}, {&ByteStream});
    127127    mmapK.setInitialArguments({fileSize});
     
    129129    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    130130
    131     kernel::S2PKernel  s2pk(iBuilder);
     131    S2PKernel  s2pk(iBuilder);
    132132    s2pk.generateKernel({&ByteStream}, {&BasisBits});
    133133
     
    218218}
    219219
    220 
    221220void run(MatchParens match, const std::string & fileName) {
    222221    const boost::filesystem::path file(fileName);
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5357 r5361  
    189189    Value * bufferSize = iBuilder->CreateLoad(iBuilder->CreateGEP(bufferSizesPtr, bid));
    190190
    191     if (CountOnly){
     191    if (CountOnly) {
    192192        Value * strideBlocks = ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    193193        Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateAdd(iBuilder->CreateMul(bid, strideBlocks), tid));
    194194        Value * result = iBuilder->CreateCall(mainFunc, {inputStream, bufferSize});
    195195        iBuilder->CreateStore(result, outputThreadPtr);
    196     }
    197     else {
     196    } else {
    198197        Type * const outputStremType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), 1);
    199198        Value * outputStreamPtr = iBuilder->CreateGEP(iBuilder->CreateBitCast(outputPtr, outputStremType), startBlock);
     
    345344        iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
    346345
    347     }
    348     else{
     346    } else {
    349347        kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    350348        scanMatchK.generateKernel({&mergedResults, &LineBreakStream}, {});               
     
    353351        KernelList.push_back(&scanMatchK);
    354352
    355         if (pipelineParallel){
     353        if (pipelineParallel) {
    356354            generatePipelineParallel(iBuilder, KernelList);
    357         } else if (segmentPipelineParallel){
     355        } else if (segmentPipelineParallel) {
    358356            generateSegmentParallelPipeline(iBuilder, KernelList);
    359         }  else{
     357        } else {
    360358            generatePipelineLoop(iBuilder, KernelList);
    361359        }
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5356 r5361  
    264264
    265265void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
    266     //iBuilder->CallPrintInt(mKernelName + "_" + name + "_produced_count", value);
    267266    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    268267}
     
    576575    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
    577576
     577    unsigned priorIdx = 0;
    578578    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    579         unsigned priorIdx = 0;
    580579        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
    581580        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5355 r5361  
    264264    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
    265265    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
    266     ConstantInt * const size = iBuilder->getSize(mBufferBlocks * mInitialCapacity);
    267     Value * const ptr = iBuilder->CreateAlignedMalloc(bufferType, size, iBuilder->getCacheAlignment());
    268     const auto alignment = bufferType->getPrimitiveSizeInBits() / 8;
    269     Constant * bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), size->getType(), false);
    270     iBuilder->CreateMemZero(ptr, iBuilder->CreateMul(size, bufferWidth), alignment);
     266    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
     267    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
     268    Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
     269    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
    271270    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    272     iBuilder->CreateStore(ptr, streamSetPtr);
     271    iBuilder->CreateStore(iBuilder->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
    273272}
    274273
     
    303302    Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
    304303    Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
    305     Value * newCapacity = iBuilder->CreateMul(iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1)), iBuilder->getSize(2), "newCapacity");
     304
     305    Value * newCapacity = iBuilder->CreateAdd(streamIndex, iBuilder->getSize(1));
     306    newCapacity = iBuilder->CreateCeilLog2(newCapacity);
     307    newCapacity = iBuilder->CreateShl(iBuilder->getSize(1), newCapacity, "newCapacity");
    306308
    307309    std::string tmp;
     
    330332
    331333        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
    332         Value * newStreamSet = iBuilder->CreateAlignedMalloc(elementType, size, iBuilder->getCacheAlignment());
     334        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
    333335        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
    334336
  • icGREP/icgrep-devel/icgrep/pablo/carry_data.h

    r5354 r5361  
    3030        , BorrowedSummary
    3131        , ExplicitSummary
     32        , CountingSummary
    3233    };
    3334
    3435    CarryData()
    35     : mSummaryType(NoSummary)
    36     , mNonCarryCollapsingMode(false) {
     36    : mSummaryType(NoSummary) {
    3737
    3838    }
     
    5454    }
    5555
     56    bool hasCountingSummary() const {
     57        return (mSummaryType == CountingSummary);
     58    }
     59
    5660    void setSummaryType(const SummaryType value) {
    5761        mSummaryType = value;
    58     }
    59 
    60     bool nonCarryCollapsingMode() const {
    61         return mNonCarryCollapsingMode;
    62     }
    63 
    64     void setNonCarryCollapsingMode(const bool value = true) {
    65         mNonCarryCollapsingMode = value;
    6662    }
    6763   
     
    6965
    7066    SummaryType             mSummaryType;
    71     bool                    mNonCarryCollapsingMode;
    7267
    7368};
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5354 r5361  
    2121namespace pablo {
    2222
     23inline static unsigned floor_log2(const unsigned v) {
     24    assert ("log2(0) is undefined!" && v != 0);
     25    return 31 - __builtin_clz(v);
     26}
     27
    2328inline static unsigned nearest_pow2(const uint32_t v) {
    2429    assert(v > 0 && v < (UINT32_MAX / 2));
     
    97102    mCarryScopes = 0;
    98103    mCarryScopeIndex.push_back(0);
     104
     105
    99106    assert (mCarryFrame.empty());
    100     assert (mCarrySummary.empty());
     107
     108    assert (mCarryInSummary.empty());
     109    mCarryInSummary.push_back(Constant::getNullValue(mCarryPackType));
     110
     111    assert (mCarryOutSummary.empty());
     112    mCarryOutSummary.push_back(Constant::getNullValue(mCarryPackType));
    101113
    102114    if (mHasLoop) {
     
    118130    }
    119131    assert (mCarryFrame.empty());
    120     assert (mCarrySummary.empty());
     132
     133    assert (mCarryInSummary.size() == 1);
     134    mCarryInSummary.clear();
     135
     136    assert (mCarryOutSummary.size() == 1);
     137    mCarryOutSummary.clear();
     138
    121139    assert (mCarryScopeIndex.size() == 1);
    122140    mCarryScopeIndex.clear();
     
    140158
    141159    if (mCarryInfo->hasSummary()) {
    142         PHINode * carrySummary = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
    143         assert (!mCarrySummary.empty());
    144         carrySummary->addIncoming(mCarrySummary.back(), entryBlock);
     160        PHINode * phiCarryOutSummary = iBuilder->CreatePHI(mCarryPackType, 2, "summary");
     161        assert (!mCarryOutSummary.empty());
     162        phiCarryOutSummary->addIncoming(mCarryOutSummary.back(), entryBlock);
    145163        // Replace the incoming carry summary with the phi node and add the phi node to the stack
    146164        // so that we can properly OR it into the outgoing summary value.
    147         mCarrySummary.back() = carrySummary;
    148         mCarrySummary.push_back(carrySummary);
    149     }
    150 
    151     if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
     165        mCarryOutSummary.back() = phiCarryOutSummary;
     166        Value * carryOut = phiCarryOutSummary;
     167        // In non-carry-collapsing mode, the carry out summary of this block iteration *MUST* match the carry in of the
     168        // subsequent block iteration. Otherwise the subsequent block iteration may become trapped in an infinite loop.
     169        // To ensure this, we effectively "zero-initialize" the carry-out coming into this loop but OR in carry-out
     170        // of the outer scope for the phi value the end of the loop body. This avoids us needing to maintain a carry-in
     171        // summary for all outer scopes whenever only a nested scope requires this mode.
     172        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     173            carryOut = Constant::getNullValue(mCarryPackType);
     174        }
     175        mCarryOutSummary.push_back(carryOut);
     176    }
     177
     178    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     179
    152180        // Check whether we need to resize the carry state
    153181        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    154182        mLoopIndicies.push_back(index);
    155183        index->addIncoming(iBuilder->getSize(0), entryBlock);
     184
     185        mCarryInSummary.push_back(Constant::getNullValue(mCarryPackType));
     186
    156187        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    157188        Value * capacity = iBuilder->CreateLoad(capacityPtr, false, "capacity");
     189        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
    158190        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    159191        Value * array = iBuilder->CreateLoad(arrayPtr, false, "array");
    160192
     193        BasicBlock * const entry = iBuilder->GetInsertBlock();
     194        BasicBlock * const resizeCarryState = mKernel->CreateBasicBlock("ResizeCarryState");
     195        BasicBlock * const reallocExisting = mKernel->CreateBasicBlock("ReallocExisting");
     196        BasicBlock * const createNew = mKernel->CreateBasicBlock("CreateNew");
     197        BasicBlock * const resumeKernel = mKernel->CreateBasicBlock("ResumeKernel");
     198
     199        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(index, capacity), resumeKernel, resizeCarryState);
     200
     201        // RESIZE CARRY BLOCK
     202        iBuilder->SetInsertPoint(resizeCarryState);
     203        const auto BlockWidth = mCarryPackType->getPrimitiveSizeInBits() / 8;
     204        const auto Log2BlockWidth = floor_log2(BlockWidth);
     205        Constant * const carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(array->getType()->getPointerElementType()), iBuilder->getSizeTy(), false);
     206        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     207
     208        Value * const hasCarryState = iBuilder->CreateICmpNE(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
     209
     210        iBuilder->CreateLikelyCondBr(hasCarryState, reallocExisting, createNew);
     211
     212        // REALLOCATE EXISTING
     213        iBuilder->SetInsertPoint(reallocExisting);
     214
     215        Value * const capacitySize = iBuilder->CreateMul(capacity, carryStateWidth);
     216        Value * const newCapacitySize = iBuilder->CreateShl(capacitySize, 1); // x 2
     217
     218
     219        Value * newArray = iBuilder->CreateAlignedMalloc(newCapacitySize, iBuilder->getCacheAlignment());
     220        iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
     221        iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
     222        iBuilder->CreateAlignedFree(array);
     223        newArray = iBuilder->CreatePointerCast(newArray, array->getType());
     224        iBuilder->CreateStore(newArray, arrayPtr);
     225
     226        Value * const log2capacity = iBuilder->CreateAdd(iBuilder->CreateCeilLog2(capacity), ONE);
     227        Value * const summarySize = iBuilder->CreateShl(log2capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
     228        Value * const newLog2Capacity = iBuilder->CreateAdd(log2capacity, ONE);
     229        Value * const newSummarySize = iBuilder->CreateShl(newLog2Capacity, Log2BlockWidth + 1); // x 2(BlockWidth)
     230
     231        Value * const summary = iBuilder->CreateLoad(summaryPtr, false);
     232        Value * newSummary = iBuilder->CreateAlignedMalloc(newSummarySize, BlockWidth);
     233        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
     234        iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
     235        iBuilder->CreateAlignedFree(summary);
     236
     237        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
     238        ptr1 = iBuilder->CreatePointerCast(ptr1, mCarryPackType->getPointerTo());
     239
     240        Value * ptr2 = iBuilder->CreateGEP(newSummary, iBuilder->CreateAdd(summarySize, iBuilder->getSize(BlockWidth)));
     241        ptr2 = iBuilder->CreatePointerCast(ptr2, mCarryPackType->getPointerTo());
     242
     243        newSummary = iBuilder->CreatePointerCast(newSummary, mCarryPackType->getPointerTo());
     244        iBuilder->CreateStore(newSummary, summaryPtr);
     245        Value * const newCapacity = iBuilder->CreateShl(ONE, log2capacity);
     246
     247        iBuilder->CreateStore(newCapacity, capacityPtr);
     248
     249        iBuilder->CreateBr(resumeKernel);
     250
     251        // CREATE NEW
     252        iBuilder->SetInsertPoint(createNew);
     253
     254        Constant * const initialLog2Capacity = iBuilder->getInt64(4);
     255        Constant * const initialCapacity = ConstantExpr::getShl(ONE, initialLog2Capacity);
     256        Constant * const initialCapacitySize = ConstantExpr::getMul(initialCapacity, carryStateWidth);
     257
     258        Value * initialArray = iBuilder->CreateAlignedMalloc(initialCapacitySize, iBuilder->getCacheAlignment());
     259        iBuilder->CreateMemZero(initialArray, initialCapacitySize, BlockWidth);
     260        initialArray = iBuilder->CreatePointerCast(initialArray, array->getType());
     261        iBuilder->CreateStore(initialArray, arrayPtr);
     262
     263        Constant * initialSummarySize = ConstantExpr::getShl(ConstantExpr::getAdd(initialLog2Capacity, iBuilder->getInt64(1)), iBuilder->getInt64(Log2BlockWidth + 1));
     264        Value * initialSummary = iBuilder->CreateAlignedMalloc(initialSummarySize, BlockWidth);
     265        iBuilder->CreateMemZero(initialSummary, initialSummarySize, BlockWidth);
     266        initialSummary = iBuilder->CreatePointerCast(initialSummary, mCarryPackType->getPointerTo());
     267        iBuilder->CreateStore(initialSummary, summaryPtr);
     268
     269        iBuilder->CreateStore(initialCapacity, capacityPtr);
     270
     271        iBuilder->CreateBr(resumeKernel);
     272
     273        // RESUME KERNEL
     274        iBuilder->SetInsertPoint(resumeKernel);
     275        // Load the appropriate carry stat block
     276        PHINode * phiArrayPtr = iBuilder->CreatePHI(array->getType(), 3);
     277        phiArrayPtr->addIncoming(array, entry);
     278        phiArrayPtr->addIncoming(initialArray, createNew);
     279        phiArrayPtr->addIncoming(newArray, reallocExisting);
     280
     281        // note: the 3 here is only to pass the assertion later. It refers to the number of elements in the carry data struct.
     282        mCarryFrame.emplace_back(mCurrentFrame, 3);
     283        mCurrentFrame = iBuilder->CreateGEP(phiArrayPtr, index);
     284    }
     285}
     286
     287/** ------------------------------------------------------------------------------------------------------------- *
     288 * @brief leaveLoopBody
     289 ** ------------------------------------------------------------------------------------------------------------- */
     290void CarryManager::leaveLoopBody(BasicBlock * /* exitBlock */) {
     291
     292    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     293
     294        std::tie(mCurrentFrame, mCurrentFrameIndex) = mCarryFrame.back();
     295        mCarryFrame.pop_back();
     296        assert (!mCarryInSummary.empty());
     297        Value * carryInAccumulator = mCarryInSummary.back();
     298        Value * carryOutAccumulator = mCarryOutSummary.back();
     299
     300        // In non-carry-collapsing mode, we cannot rely on the fact that performing a single iteration of this
     301        // loop will consume all of the incoming carries from the prior block. We need to subtract the carries
     302        // consumed by this iteration from our carry summary state. To do so in parallel, we use the the half-
     303        // subtractor circuit to do it in ceil log2 steps. Similarly, we compute our carry out summary state
     304        // (for the subsequent block to subtract) using a half-adder circuit.
     305
     306        // NOTE: this requires that, for all loop iterations, i, and all block iterations, j, the carry in
     307        // summary, CI_i,j, matches the carry out summary of the prior block iteration, CO_i,j - 1.
     308        // Otherwise we may end up with an incorrect result or being trapped in an infinite loop.
     309
     310        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     311        Value * capacity = iBuilder->CreateLoad(capacityPtr, false);
     312        Value * summaryPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     313        Value * summary = iBuilder->CreateLoad(summaryPtr, false);
     314
     315        Constant * const ONE = ConstantInt::get(capacity->getType(), 1);
     316
     317        Value * loopSelector = iBuilder->CreateZExt(mLoopSelector, capacity->getType());
     318
    161319        BasicBlock * entry = iBuilder->GetInsertBlock();
    162         BasicBlock * resizeBlock = mKernel->CreateBasicBlock("");
    163         BasicBlock * cleanUpBlock = mKernel->CreateBasicBlock("");
    164         BasicBlock * zeroBlock = mKernel->CreateBasicBlock("");
    165         BasicBlock * codeBlock = mKernel->CreateBasicBlock("");
    166 
    167         Value * cond = iBuilder->CreateICmpULT(index, capacity);
    168         iBuilder->CreateCondBr(cond, codeBlock, resizeBlock);
    169         iBuilder->SetInsertPoint(resizeBlock);
    170 
    171         Type * const carryStateType = array->getType()->getPointerElementType();
    172 
    173         Constant * carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(carryStateType), capacity->getType(), false);
    174         Value * newCapacity = iBuilder->CreateSelect(iBuilder->CreateICmpNE(index, ConstantInt::getNullValue(index->getType())), iBuilder->CreateMul(index, iBuilder->getSize(2)), iBuilder->getSize(4));
    175         Value * newArray = iBuilder->CreateAlignedMalloc(carryStateType, newCapacity, iBuilder->getCacheAlignment());
    176 
    177         assert (newCapacity->getType() == capacity->getType());
    178         assert (newArray->getType() == array->getType());
    179 
    180         Value * isNullCarryState = iBuilder->CreateICmpEQ(array, ConstantPointerNull::get(cast<PointerType>(array->getType())));
    181 
    182         iBuilder->CreateCondBr(isNullCarryState, zeroBlock, cleanUpBlock);
    183         iBuilder->SetInsertPoint(cleanUpBlock);
    184 
    185         iBuilder->CreateMemCpy(newArray, array, iBuilder->CreateMul(capacity, carryStateWidth), iBuilder->getCacheAlignment());
    186         iBuilder->CreateAlignedFree(array);
    187         iBuilder->CreateBr(zeroBlock);
    188 
    189         iBuilder->SetInsertPoint(zeroBlock);
    190 
    191         iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacity), iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), carryStateWidth), iBuilder->getCacheAlignment());
    192         iBuilder->CreateStore(newCapacity, capacityPtr);
    193         iBuilder->CreateStore(newArray, arrayPtr);
    194 
    195         iBuilder->CreateBr(codeBlock);
    196 
    197         // Load the appropriate carry stat block
    198         iBuilder->SetInsertPoint(codeBlock);
    199 
    200         PHINode * phiArray = iBuilder->CreatePHI(array->getType(), 2);
    201         phiArray->addIncoming(array, entry);
    202         phiArray->addIncoming(newArray, zeroBlock);
    203 
    204         mCurrentFrame = iBuilder->CreateGEP(phiArray, index);
    205 
    206     }
    207 }
    208 
    209 /** ------------------------------------------------------------------------------------------------------------- *
    210  * @brief leaveLoopBody
    211  ** ------------------------------------------------------------------------------------------------------------- */
    212 void CarryManager::leaveLoopBody(BasicBlock * const exitBlock) {
    213     if (mCarryInfo->hasSummary()) {
    214         const auto n = mCarrySummary.size(); assert (n > 1);
    215         cast<PHINode>(mCarrySummary[n - 2])->addIncoming(mCarrySummary[n - 1], exitBlock);
    216         mCarrySummary.pop_back();
    217     }
    218     if (LLVM_UNLIKELY(mCarryInfo->nonCarryCollapsingMode())) {
     320        BasicBlock * update = mKernel->CreateBasicBlock("UpdateNonCarryCollapsingSummary");
     321        BasicBlock * resume = mKernel->CreateBasicBlock("ResumeAfterUpdatingNonCarryCollapsingSummary");
     322
     323        iBuilder->CreateBr(update);
     324
     325        iBuilder->SetInsertPoint(update);
     326        PHINode * i = iBuilder->CreatePHI(capacity->getType(), 2);
     327        i->addIncoming(ConstantInt::getNullValue(capacity->getType()), entry);
     328        PHINode * const borrow = iBuilder->CreatePHI(carryInAccumulator->getType(), 2);
     329        borrow->addIncoming(carryInAccumulator, entry);
     330        PHINode * const carry = iBuilder->CreatePHI(carryOutAccumulator->getType(), 2);
     331        carry->addIncoming(carryOutAccumulator, entry);
     332        // OR the updated carry in summary later for the summaryTest
     333        PHINode * const carryInSummary = iBuilder->CreatePHI(mCarryPackType, 2);
     334        carryInSummary->addIncoming(Constant::getNullValue(mCarryPackType), entry);
     335
     336        // half subtractor
     337        Value * const carryInOffset = iBuilder->CreateOr(iBuilder->CreateShl(i, 1), loopSelector);
     338        Value * const carryInPtr = iBuilder->CreateGEP(summary, carryInOffset);
     339        Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
     340        Value * const carryInPrime = iBuilder->CreateXor(carryIn, borrow);
     341        Value * const finalCarryInSummary = iBuilder->CreateOr(carryInSummary, carryInPrime);
     342        iBuilder->CreateBlockAlignedStore(carryInPrime, carryInPtr);
     343        carryInSummary->addIncoming(finalCarryInSummary, update);
     344        Value * finalBorrow = iBuilder->CreateAnd(iBuilder->CreateNot(carryIn), borrow);
     345        borrow->addIncoming(finalBorrow, update);
     346
     347        // half adder
     348        Value * const carryOutOffset = iBuilder->CreateXor(carryInOffset, ConstantInt::get(carryInOffset->getType(), 1));
     349        Value * const carryOutPtr = iBuilder->CreateGEP(summary, carryOutOffset);
     350        Value * const carryOut = iBuilder->CreateBlockAlignedLoad(carryOutPtr);
     351        Value * const carryOutPrime = iBuilder->CreateXor(carryOut, carry);
     352        iBuilder->CreateBlockAlignedStore(carryOutPrime, carryOutPtr);
     353        Value * finalCarry = iBuilder->CreateAnd(carryOut, carry);
     354        carry->addIncoming(finalCarry, update);
     355
     356        // loop condition
     357        Value * n = iBuilder->CreateAdd(i, ONE);
     358        i->addIncoming(n, update);
     359        iBuilder->CreateCondBr(iBuilder->CreateICmpNE(iBuilder->CreateShl(ONE, i), capacity), update, resume);
     360
     361        iBuilder->SetInsertPoint(resume);
     362
     363        IntegerType * ty = IntegerType::get(iBuilder->getContext(), iBuilder->getBitBlockWidth());
     364        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalBorrow, ty), Constant::getNullValue(ty)), "borrow != 0");
     365        iBuilder->CreateAssert(iBuilder->CreateICmpEQ(iBuilder->CreateBitCast(finalCarry, ty), Constant::getNullValue(ty)), "carry != 0");
     366
    219367        assert (!mLoopIndicies.empty());
    220368        PHINode * index = mLoopIndicies.back();
    221         index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), exitBlock);
     369        index->addIncoming(iBuilder->CreateAdd(index, iBuilder->getSize(1)), resume);
    222370        mLoopIndicies.pop_back();
     371        mCarryInSummary.back() = finalCarryInSummary;
     372    }
     373    if (mCarryInfo->hasSummary()) {
     374        const auto n = mCarryOutSummary.size(); assert (n > 1);
     375        Value * carryOut = mCarryOutSummary.back();
     376        mCarryOutSummary.pop_back();
     377        PHINode * phiCarryOut = cast<PHINode>(mCarryOutSummary.back());
     378        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     379            carryOut = iBuilder->CreateOr(phiCarryOut, carryOut);
     380        }
     381        phiCarryOut->addIncoming(carryOut, iBuilder->GetInsertBlock());
    223382    }
    224383}
     
    227386 * @brief leaveLoopScope
    228387 ** ------------------------------------------------------------------------------------------------------------- */
    229 void CarryManager::leaveLoopScope(BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
     388void CarryManager::leaveLoopScope(BasicBlock * const /* entryBlock */, BasicBlock * const /* exitBlock */) {
    230389    assert (mLoopDepth > 0);
    231390    --mLoopDepth;
     
    239398    ++mIfDepth;
    240399    enterScope(scope);
    241     mCarrySummary.push_back(Constant::getNullValue(mCarryPackType));
     400    mCarryOutSummary.push_back(Constant::getNullValue(mCarryPackType));
    242401}
    243402
     
    247406Value * CarryManager::generateSummaryTest(Value * condition) {
    248407    if (LLVM_LIKELY(mCarryInfo->hasSummary())) {
    249         // enter the (potentially nested) struct and extract the summary element (always element 0)
    250         unsigned count = 2;
    251         if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
    252             Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
    253             count = 1;
    254             while (frameTy->isStructTy()) {
    255                 ++count;
    256                 frameTy = frameTy->getStructElementType(0);
     408        Value * summary = nullptr;
     409        if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     410            summary = mCarryInSummary.back();
     411            mCarryInSummary.pop_back();
     412        } else {
     413            // enter the (potentially nested) struct and extract the summary element (always element 0)
     414            unsigned count = 2;
     415            if (LLVM_UNLIKELY(mCarryInfo->hasBorrowedSummary())) {
     416                Type * frameTy = mCurrentFrame->getType()->getPointerElementType();
     417                count = 1;
     418                while (frameTy->isStructTy()) {
     419                    ++count;
     420                    frameTy = frameTy->getStructElementType(0);
     421                }
    257422            }
    258         }
    259         const bool useLoopSelector = mCarryInfo->hasImplicitSummary() && mLoopDepth > 0;
    260         const auto length = count + (useLoopSelector ? 1 : 0);
    261         Value * indicies[length];
    262         std::fill(indicies, indicies + count, iBuilder->getInt32(0));
    263         if (LLVM_UNLIKELY(useLoopSelector)) {
    264             indicies[count] = mLoopSelector;
    265         }
    266         ArrayRef<Value *> ar(indicies, length);
    267         Value * ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
    268         // Sanity check: make sure we're accessing a summary value
    269         assert (ptr->getType()->getPointerElementType()->canLosslesslyBitCastTo(condition->getType()));
    270         Value * summary = iBuilder->CreateBlockAlignedLoad(ptr);
     423            const bool useLoopSelector = mCarryInfo->hasImplicitSummary() && mLoopDepth > 0;
     424            const auto length = count + (useLoopSelector ? 1 : 0);
     425            Value * indicies[length];
     426            std::fill(indicies, indicies + count, iBuilder->getInt32(0));
     427            if (LLVM_UNLIKELY(useLoopSelector)) {
     428                indicies[count] = mLoopSelector;
     429            }
     430            ArrayRef<Value *> ar(indicies, length);
     431            Value * ptr = iBuilder->CreateGEP(mCurrentFrame, ar);
     432            // Sanity check: make sure we're accessing a summary value
     433            assert (ptr->getType()->getPointerElementType()->canLosslesslyBitCastTo(condition->getType()));
     434            summary = iBuilder->CreateBlockAlignedLoad(ptr);
     435        }
    271436        condition = iBuilder->simd_or(condition, summary);
    272437    }
     
    285450 ** ------------------------------------------------------------------------------------------------------------- */
    286451void CarryManager::leaveIfBody(BasicBlock * const exitBlock) { assert (exitBlock);
    287     const auto n = mCarrySummary.size();
     452    const auto n = mCarryOutSummary.size();
    288453    if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {
    289         assert (!mCarrySummary.empty());
     454        assert (!mCarryOutSummary.empty());
    290455        Value * ptr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    291         Value * const value = iBuilder->CreateBitCast(mCarrySummary.back(), mBitBlockType);
     456        Value * const value = iBuilder->CreateBitCast(mCarryOutSummary.back(), mBitBlockType);
    292457        iBuilder->CreateBlockAlignedStore(value, ptr);
    293458    }
    294459    if (n > 1) {
    295         mCarrySummary[n - 1] = iBuilder->CreateOr(mCarrySummary[n - 1], mCarrySummary[n - 2], "summary");
     460        mCarryOutSummary[n - 1] = iBuilder->CreateOr(mCarryOutSummary[n - 1], mCarryOutSummary[n - 2], "summary");
    296461    }
    297462}
     
    303468    assert (mIfDepth > 0);
    304469    if (mCarryInfo->hasSummary()) {
    305         const auto n = mCarrySummary.size(); assert (n > 0);
     470        const auto n = mCarryOutSummary.size(); assert (n > 0);
    306471        if (n > 1) {
    307472            // When leaving a nested If scope with a summary value, phi out the summary to ensure the
    308473            // appropriate summary is stored in the outer scope.
    309             Value * nested = mCarrySummary[n - 1];
    310             Value * outer = mCarrySummary[n - 2];
     474            Value * nested = mCarryOutSummary[n - 1];
     475            Value * outer = mCarryOutSummary[n - 2];
    311476            if (LLVM_LIKELY(nested != outer)) {
    312477                assert (nested->getType() == outer->getType());
     
    314479                phi->addIncoming(outer, entryBlock);
    315480                phi->addIncoming(nested, exitBlock);
    316                 mCarrySummary[n - 2] = phi;
     481                mCarryOutSummary[n - 2] = phi;
    317482            }
    318483        }       
     
    320485    --mIfDepth;
    321486    leaveScope();
    322     mCarrySummary.pop_back();
     487    mCarryOutSummary.pop_back();
    323488}
    324489
     
    447612    Value * carryIn = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(buffer, loadIndex0));
    448613    assert (carryIn->getType() == mBitBlockType);
     614    // in non-carry collapsing mode, we need to accumulate the carry in value in order to properly subtract it from the
     615    // carry in state in order to deduce whether we still have pending iterations even if the loop condition fails.
     616    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     617        mCarryInSummary.back() = iBuilder->CreateOr(mCarryInSummary.back(), carryIn);
     618    }
    449619    // If the long advance is an exact multiple of mBitBlockWidth, we simply return the oldest
    450620    // block in the long advance carry data area. 
     
    472642    if (mLoopDepth > 0) {
    473643        carryInPtr = iBuilder->CreateGEP(carryInPtr, {iBuilder->getInt32(0), mLoopSelector});       
    474     }
     644    }   
    475645    assert (carryInPtr->getType()->getPointerElementType() == mCarryPackType);
    476646    Value * const carryIn = iBuilder->CreateBlockAlignedLoad(carryInPtr);
     647    // in non-carry collapsing mode, we need to accumulate the carry in value in order to properly subtract it from the
     648    // carry in state in order to deduce whether we still have pending iterations even if the loop condition fails.
     649    if (LLVM_UNLIKELY(mCarryInfo->hasCountingSummary())) {
     650        mCarryInSummary.back() = iBuilder->CreateOr(mCarryInSummary.back(), carryIn);
     651    }
    477652    if (mLoopDepth > 0) {
    478653        iBuilder->CreateBlockAlignedStore(Constant::getNullValue(mCarryPackType), carryInPtr);
     
    485660 ** ------------------------------------------------------------------------------------------------------------- */
    486661void CarryManager::setNextCarryOut(Value * carryOut) {
    487     if (LLVM_LIKELY(mCarryInfo->hasExplicitSummary())) {       
     662    if (mCarryInfo->hasExplicitSummary() || mCarryInfo->hasCountingSummary()) {
    488663        addToSummary(carryOut);
    489664    }
     
    506681 ** ------------------------------------------------------------------------------------------------------------- */
    507682void CarryManager::addToSummary(Value * value) { assert (value);
    508     assert (mIfDepth > 0 && !mCarrySummary.empty());
    509     Value * const summary = mCarrySummary.back(); assert (summary);
     683    assert (!mCarryOutSummary.empty());
     684    Value * const summary = mCarryOutSummary.back(); assert (summary);
    510685    if (LLVM_UNLIKELY(summary == value)) {
    511686        return;  //Nothing to add.
     
    516691            return;
    517692        } else if (LLVM_UNLIKELY(cast<Constant>(value)->isAllOnesValue())) {
    518             mCarrySummary.back() = value;
     693            mCarryOutSummary.back() = value;
    519694            return;
    520695        }
    521696    } else if (LLVM_UNLIKELY(isa<Constant>(summary))) {
    522697        if (LLVM_UNLIKELY(cast<Constant>(summary)->isZeroValue())) {
    523             mCarrySummary.back() = value;
     698            mCarryOutSummary.back() = value;
    524699            return;
    525700        } else if (LLVM_UNLIKELY(cast<Constant>(summary)->isAllOnesValue())) {
     
    527702        }
    528703    }   
    529     mCarrySummary.back() = iBuilder->CreateOr(summary, value);
     704    mCarryOutSummary.back() = iBuilder->CreateOr(summary, value);
    530705}
    531706
     
    534709 ** ------------------------------------------------------------------------------------------------------------- */
    535710bool CarryManager::inCollapsingCarryMode() const {
    536     return (mCurrentScope->getBranch() && isa<While>(mCurrentScope->getBranch()) && !mCarryInfo->nonCarryCollapsingMode());
     711    return (mCurrentScope->getBranch() && isa<While>(mCurrentScope->getBranch()) && !mCarryInfo->hasCountingSummary());
    537712}
    538713
     
    621796
    622797    StructType * carryState = nullptr;
    623 
    624798    // Add the summary pack if needed.
    625799    CarryData::SummaryType summaryType = CarryData::NoSummary;
     
    627801        carryState = StructType::get(iBuilder->getContext());
    628802    } else {
    629         cd.setNonCarryCollapsingMode(loopDepth > 0 && hasIterationSpecificAssignment(scope));
    630         if (ifDepth > 0) {
    631             // A non-collapsing loop requires a unique summary for each iteration. Thus whenever
    632             // we have a non-collapsing While within an If scope with an implicit summary, the If
    633             // scope requires an explicit summary.
    634             if (cd.nonCarryCollapsingMode() || isa<If>(scope->getBranch())) {
    635                 if (LLVM_LIKELY(state.size() > 1 || hasLongAdvances)) {
     803        const bool nonCarryCollapsingMode = loopDepth > 0 && hasIterationSpecificAssignment(scope);
     804        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
     805            summaryType = CarryData::CountingSummary;
     806        } else if (ifDepth > 0) {
     807            // A non-collapsing loop requires a unique summary for each iteration. Thus whenever we have a non-collapsing While
     808            // within an If scope with an implicit summary, the If scope requires an explicit summary.
     809            if (isa<If>(scope->getBranch())) {
     810                if (LLVM_LIKELY(hasLongAdvances || state.size() > 1)) {
    636811                    summaryType = CarryData::ExplicitSummary;
    637812                    state.insert(state.begin(), mCarryPackType);
     
    645820        }
    646821        carryState = StructType::get(iBuilder->getContext(), state);
    647         // If we in a loop and cannot use collapsing carry mode, convert the struct into a capacity and pointer pair.
    648         if (LLVM_UNLIKELY(cd.nonCarryCollapsingMode())) {
    649             mHasVariableLengthCarryData = true;
    650             carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), nullptr);
     822        // If we're in a loop and cannot use collapsing carry mode, convert the carry state struct into a capacity,
     823        // carry state pointer, and summary pointer struct.
     824        if (LLVM_UNLIKELY(nonCarryCollapsingMode)) {
     825            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), mCarryPackType->getPointerTo(), nullptr);
    651826        }
    652827    }
     
    673848, mIfDepth(0)
    674849, mHasLongAdvance(false)
    675 , mHasVariableLengthCarryData(false)
    676850, mHasLoop(false)
    677851, mLoopDepth(0)
     
    682856
    683857}
    684 
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r5353 r5361  
    123123    bool                                            mHasLongAdvance;
    124124
    125     bool                                            mHasVariableLengthCarryData;
    126 
    127125    bool                                            mHasLoop;
    128126    unsigned                                        mLoopDepth;
     
    137135    std::vector<unsigned>                           mCarryScopeIndex;
    138136
    139     std::vector<llvm::Value *>                      mCarrySummary;
     137    std::vector<llvm::Value *>                      mCarryInSummary;
     138    std::vector<llvm::Value *>                      mCarryOutSummary;
    140139};
    141140
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5353 r5361  
    153153    compileBlock(ifBody);
    154154
    155     BasicBlock * ifExitBlock = iBuilder->GetInsertBlock();   
    156 
    157     mCarryManager->leaveIfBody(ifExitBlock);
     155    mCarryManager->leaveIfBody(iBuilder->GetInsertBlock());
     156
     157    BasicBlock * ifExitBlock = iBuilder->GetInsertBlock();
    158158
    159159    iBuilder->CreateBr(ifEndBlock);
     
    282282    mCarryManager->enterLoopBody(whileEntryBlock);
    283283
    284     //
    285     // Now compile the loop body proper.  Carry-out accumulated values
    286     // and iterated values of Next nodes will be computed.
    287284    compileBlock(whileBody);
    288285
    289286    // After the whileBody has been compiled, we may be in a different basic block.
    290     BasicBlock * const whileExitBlock = iBuilder->GetInsertBlock();
    291 
    292     // Terminate the while loop body with a conditional branch back.
    293     Value * condition = compileExpression(whileStatement->getCondition());
    294     if (condition->getType() == iBuilder->getBitBlockType()) {
    295         condition = iBuilder->bitblock_any(mCarryManager->generateSummaryTest(condition));
    296     }
    297 
    298     mCarryManager->leaveLoopBody(whileExitBlock);
     287
     288    mCarryManager->leaveLoopBody(iBuilder->GetInsertBlock());
    299289
    300290
     
    306296    }
    307297#endif
     298
     299    BasicBlock * const whileExitBlock = iBuilder->GetInsertBlock();
    308300
    309301    // and for any variant nodes in the loop body
     
    341333
    342334    BasicBlock * whileEndBlock = mKernel->CreateBasicBlock("while.end");
     335
     336    // Terminate the while loop body with a conditional branch back.
     337    Value * condition = compileExpression(whileStatement->getCondition());
     338    if (condition->getType() == iBuilder->getBitBlockType()) {
     339        condition = iBuilder->bitblock_any(mCarryManager->generateSummaryTest(condition));
     340    }
    343341
    344342    iBuilder->CreateCondBr(condition, whileBodyBlock, whileEndBlock);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5353 r5361  
    174174    PM.add(createGVNPass());                     //Eliminate common subexpressions.
    175175    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    176     PM.add(createCFGSimplificationPass());
     176    PM.add(createCFGSimplificationPass());   
    177177    PM.run(*m);
    178178
     
    217217#if LLVM_VERSION_MINOR > 6
    218218    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
    219       WriteAssembly(builder.selectTarget(), m);
     219        WriteAssembly(builder.selectTarget(), m);
    220220    }
    221221#endif
     
    223223    if (engine == nullptr) {
    224224        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    225     }   
     225    }
    226226    return engine;
    227227}
     
    236236            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
    237237        e->setObjectCache(cache);
    238     }   
    239 }
    240 
    241 
     238    }
     239}
     240
     241
Note: See TracChangeset for help on using the changeset viewer.