Ignore:
Timestamp:
May 12, 2017, 4:54:11 PM (2 years ago)
Author:
nmedfort
Message:

Refactoring work + correction for getRawItemPointer

Location:
icGREP/icgrep-devel/icgrep
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5436 r5446  
    118118    }
    119119
    120     llvm::VectorType * getStreamTy(const unsigned FieldWidth = 1) {
    121         return llvm::VectorType::get(llvm::IntegerType::getIntNTy(getContext(), FieldWidth), 0);
     120    static llvm::VectorType * getStreamTy(llvm::LLVMContext & C, const unsigned FieldWidth = 1) {
     121        return llvm::VectorType::get(llvm::IntegerType::getIntNTy(C, FieldWidth), 0);
    122122    }
    123123
    124     inline llvm::ArrayType * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
    125         return llvm::ArrayType::get(getStreamTy(FieldWidth), NumElements);
     124    static llvm::ArrayType * getStreamSetTy(llvm::LLVMContext & C, const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
     125        return llvm::ArrayType::get(getStreamTy(C, FieldWidth), NumElements);
     126    }
     127
     128    llvm::VectorType * getStreamTy(const unsigned FieldWidth = 1) {
     129        return getStreamTy(getContext(), FieldWidth);
     130    }
     131
     132    llvm::ArrayType * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
     133        return getStreamSetTy(getContext(), NumElements, FieldWidth);
    126134    }
    127135
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5436 r5446  
    1919namespace IDISA {
    2020   
    21 KernelBuilder * GetIDISA_Builder(llvm::Module * const module) {
     21KernelBuilder * GetIDISA_Builder(llvm::LLVMContext & C, const std::string & targetTriple) {
    2222    unsigned registerWidth = 0;
    23     Triple T(module->getTargetTriple());
     23    Triple T(targetTriple);
    2424    if (T.isArch64Bit()) {
    2525        registerWidth = 64;
     
    3535    if (codegen::BlockSize >= 256) {
    3636        if (hasAVX2) {
    37             return new KernelBuilderImpl<IDISA_AVX2_Builder>(module->getContext(), registerWidth, codegen::BlockSize, codegen::BlockSize);
     37            return new KernelBuilderImpl<IDISA_AVX2_Builder>(C, registerWidth, codegen::BlockSize, codegen::BlockSize);
    3838        }
    3939    } else if (codegen::BlockSize == 64) {
    40         return new KernelBuilderImpl<IDISA_I64_Builder>(module->getContext(), registerWidth, codegen::BlockSize, codegen::BlockSize);
     40        return new KernelBuilderImpl<IDISA_I64_Builder>(C, registerWidth, codegen::BlockSize, codegen::BlockSize);
    4141    }
    42     return new KernelBuilderImpl<IDISA_SSE2_Builder>(module->getContext(), registerWidth, codegen::BlockSize, codegen::BlockSize);
     42    return new KernelBuilderImpl<IDISA_SSE2_Builder>(C, registerWidth, codegen::BlockSize, codegen::BlockSize);
    4343}
    4444
    45 KernelBuilder * GetIDISA_GPU_Builder(llvm::Module * const module) {
    46     return new KernelBuilderImpl<IDISA_NVPTX20_Builder>(module->getContext(), 64, 64, 64);
     45KernelBuilder * GetIDISA_GPU_Builder(llvm::LLVMContext & C) {
     46    return new KernelBuilderImpl<IDISA_NVPTX20_Builder>(C, 64, 64, 64);
    4747}
    4848
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.h

    r5435 r5446  
    77#define IDISA_TARGET_H
    88
     9namespace llvm { class LLVMContext; }
     10namespace kernel { class KernelBuilder; }
    911#include <string>
    10 namespace llvm { class Module; }
    11 namespace kernel { class KernelBuilder; }
    1212
    1313namespace IDISA {
    1414   
    15 kernel::KernelBuilder * GetIDISA_Builder(llvm::Module * const module);
     15kernel::KernelBuilder * GetIDISA_Builder(llvm::LLVMContext & C, const std::string & targetTriple);
    1616
    17 kernel::KernelBuilder * GetIDISA_GPU_Builder(llvm::Module * const module);
     17kernel::KernelBuilder * GetIDISA_GPU_Builder(llvm::LLVMContext & C);
    1818
    1919}
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5437 r5446  
    392392    LLVMContext TheContext;
    393393    Module * M = new Module("editd-gpu", TheContext);
    394     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_GPU_Builder(M);
     394    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_GPU_Builder(M->getContext());
    395395    M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    396396    M->setTargetTriple("nvptx64-nvidia-cuda");
     
    472472        LLVMContext TheContext;
    473473    Module * M = new Module("editd-gpu", TheContext);
    474     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_GPU_Builder(M);
     474    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_GPU_Builder(M->getContext());
    475475    M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    476476    M->setTargetTriple("nvptx64-nvidia-cuda");
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5440 r5446  
    3737, mRE(re)
    3838, mSignature(Printer_RE::PrintRE(re)) {
    39     setName(sha1sum(mSignature));
     39    setName("ic" + sha1sum(mSignature));
    4040}
    4141
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5443 r5446  
    6565    }
    6666}
    67    
    68 void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    69     assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    70     assert ("IDISA Builder does not have a valid Module" && idb->getModule());
    71     std::stringstream cacheName;   
    72     cacheName << getName() << '_' << idb->getBuilderUniqueName();
    73     for (const StreamSetBuffer * b: inputs) {
    74         cacheName <<  ':' <<  b->getUniqueID();
    75     }
    76     for (const StreamSetBuffer * b: outputs) {
    77         cacheName <<  ':' <<  b->getUniqueID();
    78     }
    79     Module * const kernelModule = new Module(cacheName.str(), idb->getContext());
    80     createKernelStub(idb, inputs, outputs, kernelModule);
    81 }
    82 
    83 void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
     67
     68void Kernel::bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    8469    assert (mModule == nullptr);
    85     assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    8670    assert (mStreamSetInputBuffers.empty());
    8771    assert (mStreamSetOutputBuffers.empty());
     
    121105    }
    122106
    123     mModule = kernelModule;
    124107    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    125108    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     109}
     110
     111Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & idb) {
     112    assert (mModule == nullptr);
     113    std::stringstream cacheName;   
     114    cacheName << getName() << '_' << idb->getBuilderUniqueName();
     115    for (const StreamSetBuffer * b: mStreamSetInputBuffers) {
     116        cacheName <<  ':' <<  b->getUniqueID();
     117    }
     118    for (const StreamSetBuffer * b: mStreamSetOutputBuffers) {
     119        cacheName <<  ':' <<  b->getUniqueID();
     120    }
     121    mModule = new Module(cacheName.str(), idb->getContext());
    126122    prepareKernel(idb);
     123    return mModule;
     124}
     125
     126Module * Kernel::setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module) {
     127    assert (mModule == nullptr);
     128    mModule = module;
     129    prepareKernel(idb);
     130    return mModule;
    127131}
    128132
     
    132136        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    133137    }
    134     if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    135         std::string tmp;
    136         raw_string_ostream out(tmp);
    137         out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
    138             << mStreamSetInputs.size() << " input stream sets.";
    139         report_fatal_error(out.str());
    140     }
    141     if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
    142         std::string tmp;
    143         raw_string_ostream out(tmp);
    144         out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
    145             << mStreamSetOutputs.size() << " output stream sets.";
    146         report_fatal_error(out.str());
    147     }
    148138    const auto blockSize = idb->getBitBlockWidth();
     139    const auto requiredBlocks = codegen::SegmentSize + ((blockSize + mLookAheadPositions - 1) / blockSize);
     140
    149141    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    150         if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    151             report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
     142        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
     143            report_fatal_error(getName() + ": " + mStreamSetInputs[i].name + " requires buffer size " + std::to_string(requiredBlocks));
    152144        }
    153145        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
     
    164156        }
    165157    }
    166     for (const auto binding : mScalarInputs) {
     158    for (const auto & binding : mScalarInputs) {
    167159        addScalar(binding.type, binding.name);
    168160    }
    169     for (const auto binding : mScalarOutputs) {
     161    for (const auto & binding : mScalarOutputs) {
    170162        addScalar(binding.type, binding.name);
    171163    }
     
    173165        prepareStreamSetNameMap();
    174166    }
    175     for (auto binding : mInternalScalars) {
     167    for (const auto & binding : mInternalScalars) {
    176168        addScalar(binding.type, binding.name);
    177169    }
     
    257249    mAvailableItemCount.clear();
    258250    idb->CreateRetVoid();
    259     //CurrentMethod->dump();
    260251}
    261252
     
    605596void MultiBlockKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) {
    606597
    607     KernelBuilder * const iBuilder = kb.get();
    608     auto ip = iBuilder->saveIP();
     598    auto ip = kb->saveIP();
    609599    Function * const cp = mCurrentMethod;
    610600    const auto saveInstance = getInstance();
    611    
     601
    612602    // First prepare the multi-block method that will be used.
     603
     604    DataLayout DL(kb->getModule());
     605    IntegerType * const intAddressTy = DL.getIntPtrType(kb->getContext());
    613606
    614607    std::vector<Type *> multiBlockParmTypes;
    615608    multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
    616     multiBlockParmTypes.push_back(iBuilder->getSizeTy());
     609    multiBlockParmTypes.push_back(kb->getSizeTy());
    617610    for (auto buffer : mStreamSetInputBuffers) {
    618611        multiBlockParmTypes.push_back(buffer->getPointerType());
     
    621614        multiBlockParmTypes.push_back(buffer->getPointerType());
    622615    }
    623    
    624     FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
    625     Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
     616
     617    FunctionType * const type = FunctionType::get(kb->getVoidTy(), multiBlockParmTypes, false);
     618    Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, kb->getModule());
    626619    multiBlockFunction->setCallingConv(CallingConv::C);
    627620    multiBlockFunction->setDoesNotThrow();
     
    640633    // provide the required multi-block kernel logic.
    641634    mCurrentMethod = multiBlockFunction;
    642     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
     635    kb->SetInsertPoint(BasicBlock::Create(kb->getContext(), "multiBlockEntry", multiBlockFunction, 0));
    643636    generateMultiBlockLogic(kb);
    644637
    645     iBuilder->CreateRetVoid();
    646    
    647     iBuilder->restoreIP(ip);
     638    kb->CreateRetVoid();
     639
     640    kb->restoreIP(ip);
    648641    mCurrentMethod = cp;
    649642    setInstance(saveInstance);
    650    
     643
    651644    // Now proceed with creation of the doSegment method.
    652645
    653     BasicBlock * const entry = iBuilder->GetInsertBlock();
    654     BasicBlock * const doSegmentOuterLoop = iBuilder->CreateBasicBlock(getName() + "_doSegmentOuterLoop");
    655     BasicBlock * const doMultiBlockCall = iBuilder->CreateBasicBlock(getName() + "_doMultiBlockCall");
    656     BasicBlock * const tempBlockCheck = iBuilder->CreateBasicBlock(getName() + "_tempBlockCheck");
    657     BasicBlock * const doTempBufferBlock = iBuilder->CreateBasicBlock(getName() + "_doTempBufferBlock");
    658     BasicBlock * const segmentDone = iBuilder->CreateBasicBlock(getName() + "_segmentDone");
    659 
    660     Value * blockBaseMask = iBuilder->CreateNot(iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
     646    BasicBlock * const entry = kb->GetInsertBlock();
     647    BasicBlock * const doSegmentOuterLoop = kb->CreateBasicBlock(getName() + "_doSegmentOuterLoop");
     648    BasicBlock * const doMultiBlockCall = kb->CreateBasicBlock(getName() + "_doMultiBlockCall");
     649    BasicBlock * const tempBlockCheck = kb->CreateBasicBlock(getName() + "_tempBlockCheck");
     650    BasicBlock * const doTempBufferBlock = kb->CreateBasicBlock(getName() + "_doTempBufferBlock");
     651    BasicBlock * const segmentDone = kb->CreateBasicBlock(getName() + "_segmentDone");
     652
     653    Value * blockBaseMask = kb->CreateNot(kb->getSize(kb->getBitBlockWidth() - 1));
    661654
    662655    //
     
    669662    //
    670663
    671     unsigned bitBlockWidth = iBuilder->getBitBlockWidth();
     664    unsigned bitBlockWidth = kb->getBitBlockWidth();
    672665    std::vector<Type *> tempBuffers;
    673666    std::vector<unsigned> itemsPerPrincipalBlock;
     
    692685        }
    693686    }
     687
    694688    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    695689        auto & rate = mStreamSetOutputs[i].rate;
     
    712706        }
    713707    }
    714     Type * tempParameterStructType = StructType::create(iBuilder->getContext(), tempBuffers);
    715     Value * tempParameterArea = iBuilder->CreateCacheAlignedAlloca(tempParameterStructType);
    716 
    717     ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     708
     709    Type * tempParameterStructType = StructType::create(kb->getContext(), tempBuffers);
     710    Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);
     711
     712    ConstantInt * blockSize = kb->getSize(kb->getBitBlockWidth());
    718713
    719714    Value * availablePos = mAvailableItemCount[0];
    720715    Value * itemsAvail = availablePos;
     716
    721717    //  Make sure that corresponding data is available depending on processing rate
    722718    //  for all input stream sets.
     719
    723720    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
    724721        Value * a = mAvailableItemCount[i];
    725722        auto & rate = mStreamSetInputs[i].rate;
    726723        assert (((rate.referenceStreamSet().empty()) || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
    727         Value * maxItems = rate.CreateMaxReferenceItemsCalculation(iBuilder, a);
    728         itemsAvail = iBuilder->CreateSelect(iBuilder->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
    729     }
    730 
    731     Value * processed = iBuilder->getProcessedItemCount(mStreamSetInputs[0].name);
    732     Value * itemsToDo = iBuilder->CreateSub(itemsAvail, processed);
    733     Value * fullBlocksToDo = iBuilder->CreateUDiv(itemsToDo, blockSize);
    734     Value * excessItems = iBuilder->CreateURem(itemsToDo, blockSize);
     724        Value * maxItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), a);
     725        itemsAvail = kb->CreateSelect(kb->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
     726    }
     727
     728    Value * processed = kb->getProcessedItemCount(mStreamSetInputs[0].name);
     729    Value * itemsToDo = kb->CreateSub(itemsAvail, processed);
     730    Value * fullBlocksToDo = kb->CreateUDiv(itemsToDo, blockSize);
     731    Value * excessItems = kb->CreateURem(itemsToDo, blockSize);
    735732
    736733    //  Now we iteratively process these blocks using the doMultiBlock method.
     
    742739    //  produced.
    743740
    744     //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
    745    
    746     iBuilder->CreateBr(doSegmentOuterLoop);
    747     iBuilder->SetInsertPoint(doSegmentOuterLoop);
    748     PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
     741    kb->CreateBr(doSegmentOuterLoop);
     742    kb->SetInsertPoint(doSegmentOuterLoop);
     743    PHINode * const blocksRemaining = kb->CreatePHI(kb->getSizeTy(), 2, "blocksRemaining");
    749744    blocksRemaining->addIncoming(fullBlocksToDo, entry);
     745
    750746    // For each input buffer, determine the processedItemCount, the block pointer for the
    751747    // buffer block containing the next item, and the number of linearly available items.
    752     //
     748
    753749    std::vector<Value *> processedItemCount;
    754750    std::vector<Value *> inputBlockPtr;
     
    758754    //  Now determine the linearly available blocks, based on blocks remaining reduced
    759755    //  by limitations of linearly available input buffer space.
     756
    760757    Value * linearlyAvailBlocks = blocksRemaining;
    761758    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    762         Value * p = iBuilder->getProcessedItemCount(mStreamSetInputs[i].name);
    763         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    764         Value * b = iBuilder->getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
     759        Value * p = kb->getProcessedItemCount(mStreamSetInputs[i].name);
     760        Value * blkNo = kb->CreateUDiv(p, blockSize);
     761        Value * b = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
    765762        processedItemCount.push_back(p);
    766763        inputBlockPtr.push_back(b);
     
    768765        Value * blocks = nullptr;
    769766        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
    770             blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(iBuilder, blkNo);
     767            blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(kb.get(), blkNo);
    771768        } else {
    772             Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, p);
    773             Value * items = rate.CreateMaxReferenceItemsCalculation(iBuilder, linearlyAvailItems);
    774             blocks = iBuilder->CreateUDiv(items, blockSize);
    775         }
    776         linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     769            Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), p);
     770            Value * items = rate.CreateMaxReferenceItemsCalculation(kb.get(), linearlyAvailItems);
     771            blocks = kb->CreateUDiv(items, blockSize);
     772        }
     773        linearlyAvailBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
    777774    }
    778775    //  Now determine the linearly writeable blocks, based on available blocks reduced
     
    781778
    782779    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    783         Value * p = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
    784         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    785         Value * b = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
     780        Value * p = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     781        Value * blkNo = kb->CreateUDiv(p, blockSize);
     782        Value * b = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
    786783        producedItemCount.push_back(p);
    787784        outputBlockPtr.push_back(b);
     
    789786        Value * blocks = nullptr;
    790787        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
    791             blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(iBuilder, blkNo);
     788            blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(kb.get(), blkNo);
    792789        } else {
    793             Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(iBuilder, p);
    794             blocks = iBuilder->CreateUDiv(writableItems, blockSize);
    795         }
    796         linearlyWritableBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
    797     }
    798     Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
    799     iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, tempBlockCheck);
     790            Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(kb.get(), p);
     791            blocks = kb->CreateUDiv(writableItems, blockSize);
     792        }
     793        linearlyWritableBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
     794    }
     795    Value * haveBlocks = kb->CreateICmpUGT(linearlyWritableBlocks, kb->getSize(0));
     796    kb->CreateCondBr(haveBlocks, doMultiBlockCall, tempBlockCheck);
    800797
    801798    //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
    802799    //  Now prepare the doMultiBlock call.
    803     iBuilder->SetInsertPoint(doMultiBlockCall);
    804 
    805     Value * linearlyAvailItems = iBuilder->CreateMul(linearlyWritableBlocks, blockSize);
     800    kb->SetInsertPoint(doMultiBlockCall);
     801
     802    Value * linearlyAvailItems = kb->CreateMul(linearlyWritableBlocks, blockSize);
    806803
    807804    std::vector<Value *> doMultiBlockArgs;
     
    809806    doMultiBlockArgs.push_back(linearlyAvailItems);
    810807    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    811         Value * bufPtr = iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]);
    812         bufPtr = iBuilder->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType());
     808        Value * bufPtr = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
     809        bufPtr = kb->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType());
    813810        doMultiBlockArgs.push_back(bufPtr);
    814811    }
    815812    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    816         Value * bufPtr = iBuilder->getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]);
    817         bufPtr = iBuilder->CreatePointerCast(bufPtr, mStreamSetOutputBuffers[i]->getPointerType());
     813        Value * bufPtr = kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), producedItemCount[i]);
     814        bufPtr = kb->CreatePointerCast(bufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    818815        doMultiBlockArgs.push_back(bufPtr);
    819816    }
    820817
    821     iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
     818    kb->CreateCall(multiBlockFunction, doMultiBlockArgs);
    822819    // Do copybacks if necessary.
    823820    unsigned priorIdx = 0;
    824821    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    825         Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
     822        Value * log2BlockSize = kb->getSize(std::log2(kb->getBitBlockWidth()));
    826823        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    827             BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    828             BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    829             Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    830             Value * priorBlock = iBuilder->CreateLShr(producedItemCount[i], log2BlockSize);
    831             Value * priorOffset = iBuilder->CreateAnd(producedItemCount[i], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    832             Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    833             Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, priorBlock);
    834             Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
    835             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    836             iBuilder->CreateCondBr(wraparound, copyBack, done);
    837             iBuilder->SetInsertPoint(copyBack);
    838             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    839             cb->createCopyBack(iBuilder, instance, copyItems);
    840             iBuilder->CreateBr(done);
    841             iBuilder->SetInsertPoint(done);
     824            BasicBlock * copyBack = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     825            BasicBlock * done = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     826            Value * newlyProduced = kb->CreateSub(kb->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     827            Value * priorBlock = kb->CreateLShr(producedItemCount[i], log2BlockSize);
     828            Value * priorOffset = kb->CreateAnd(producedItemCount[i], kb->getSize(kb->getBitBlockWidth() - 1));
     829            Value * instance = kb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     830            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(kb.get(), priorBlock);
     831            Value * accessible = kb->CreateSub(kb->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
     832            Value * wraparound = kb->CreateICmpULT(accessible, newlyProduced);
     833            kb->CreateCondBr(wraparound, copyBack, done);
     834            kb->SetInsertPoint(copyBack);
     835            Value * copyItems = kb->CreateSub(newlyProduced, accessible);
     836            cb->createCopyBack(kb.get(), instance, copyItems);
     837            kb->CreateBr(done);
     838            kb->SetInsertPoint(done);
    842839            priorIdx++;
    843840        }
    844841        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    845             BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    846             BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    847             Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    848             Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    849             Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, producedItemCount[i]);
    850             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    851             iBuilder->CreateCondBr(wraparound, copyBack, done);
    852             iBuilder->SetInsertPoint(copyBack);
    853             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    854             cb->createCopyBack(iBuilder, instance, copyItems);
    855             iBuilder->CreateBr(done);
    856             iBuilder->SetInsertPoint(done);
     842            BasicBlock * copyBack = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     843            BasicBlock * done = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     844            Value * instance = kb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     845            Value * newlyProduced = kb->CreateSub(kb->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     846            Value * accessible = cb->getLinearlyAccessibleItems(kb.get(), producedItemCount[i]);
     847            Value * wraparound = kb->CreateICmpULT(accessible, newlyProduced);
     848            kb->CreateCondBr(wraparound, copyBack, done);
     849            kb->SetInsertPoint(copyBack);
     850            Value * copyItems = kb->CreateSub(newlyProduced, accessible);
     851            cb->createCopyBack(kb.get(), instance, copyItems);
     852            kb->CreateBr(done);
     853            kb->SetInsertPoint(done);
    857854            priorIdx++;
    858855        }
    859856    }
    860     iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, iBuilder->CreateAdd(processed, linearlyAvailItems));
    861     Value * reducedBlocksToDo = iBuilder->CreateSub(blocksRemaining, linearlyWritableBlocks);
    862     Value * fullBlocksRemain = iBuilder->CreateICmpUGT(reducedBlocksToDo, iBuilder->getSize(0));
    863     BasicBlock * multiBlockFinal = iBuilder->GetInsertBlock();
     857    kb->setProcessedItemCount(mStreamSetInputs[0].name, kb->CreateAdd(processed, linearlyAvailItems));
     858    Value * reducedBlocksToDo = kb->CreateSub(blocksRemaining, linearlyWritableBlocks);
     859    Value * fullBlocksRemain = kb->CreateICmpUGT(reducedBlocksToDo, kb->getSize(0));
     860    BasicBlock * multiBlockFinal = kb->GetInsertBlock();
    864861    blocksRemaining->addIncoming(reducedBlocksToDo, multiBlockFinal);
    865     iBuilder->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, tempBlockCheck);
     862    kb->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, tempBlockCheck);
    866863    //iBuilder->CreateBr(doSegmentOuterLoop);
    867864    //
     
    874871    //
    875872
    876     iBuilder->SetInsertPoint(tempBlockCheck);
    877     PHINode * const tempBlocksRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "tempBlocksRemain");
     873    kb->SetInsertPoint(tempBlockCheck);
     874    PHINode * const tempBlocksRemain = kb->CreatePHI(kb->getSizeTy(), 2, "tempBlocksRemain");
    878875    tempBlocksRemain->addIncoming(blocksRemaining, doSegmentOuterLoop);
    879876    tempBlocksRemain->addIncoming(reducedBlocksToDo, multiBlockFinal);
    880    
    881     haveBlocks = iBuilder->CreateICmpUGT(tempBlocksRemain, iBuilder->getSize(0));
    882     iBuilder->CreateCondBr(iBuilder->CreateOr(mIsFinal, haveBlocks), doTempBufferBlock, segmentDone);
     877
     878    haveBlocks = kb->CreateICmpUGT(tempBlocksRemain, kb->getSize(0));
     879    kb->CreateCondBr(kb->CreateOr(mIsFinal, haveBlocks), doTempBufferBlock, segmentDone);
    883880
    884881    //
     
    890887    // call the Multiblock routine.
    891888    //
    892     iBuilder->SetInsertPoint(doTempBufferBlock);
    893     Value * tempBlockItems = iBuilder->CreateSelect(haveBlocks, blockSize, excessItems);
     889    kb->SetInsertPoint(doTempBufferBlock);
     890    Value * tempBlockItems = kb->CreateSelect(haveBlocks, blockSize, excessItems);
    894891
    895892    // Begin constructing the doMultiBlock args.
     
    901898    //
    902899    // First zero it out.
    903     Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), iBuilder->getSizeTy(), false);
    904     iBuilder->CreateMemZero(tempParameterArea, tempAreaSize);
    905    
     900    Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), kb->getSizeTy(), false);
     901    kb->CreateMemZero(tempParameterArea, tempAreaSize);
     902
    906903    // For each input and output buffer, copy over necessary data starting from the last
    907904    // block boundary.
    908905    std::vector<Value *> finalItemPos;
    909     finalItemPos.push_back(iBuilder->CreateAdd(processedItemCount[0], tempBlockItems));
     906    finalItemPos.push_back(kb->CreateAdd(processedItemCount[0], tempBlockItems));
    910907
    911908    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {
    912         Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(i));
    913         tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
    914 
    915         Value * blockItemPos = iBuilder->CreateAnd(processedItemCount[i], blockBaseMask);
     909        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(i));
     910        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
     911
     912        Value * blockItemPos = kb->CreateAnd(processedItemCount[i], blockBaseMask);
    916913
    917914        // The number of items to copy is determined by the processing rate requirements.
     
    920917            std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    921918            if (refSet.empty()) {
    922                 finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[0], iBuilder->CreateNot(haveBlocks)));
     919                finalItemPos.push_back(rate.CreateRatioCalculation(kb.get(), finalItemPos[0], kb->CreateNot(haveBlocks)));
    923920            }
    924921            else {
     
    926923                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    927924                assert (port == Port::Input && ssIdx < i);
    928                 finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[ssIdx], iBuilder->CreateNot(haveBlocks)));
     925                finalItemPos.push_back(rate.CreateRatioCalculation(kb.get(), finalItemPos[ssIdx], kb->CreateNot(haveBlocks)));
    929926            }
    930927        }
    931         Value * neededItems = iBuilder->CreateSub(finalItemPos[i], blockItemPos);
    932         Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, blockItemPos);
    933         Value * copyItems1 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
    934         Value * copyItems2 = iBuilder->CreateSub(neededItems, copyItems1);
    935         Value * inputPtr = iBuilder->getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
    936         mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, inputPtr, copyItems1);
    937         Value * nextBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(availFromBase, blockSize));
    938         mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, iBuilder->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    939         Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
    940         Value * baseAddress = iBuilder->CreatePtrToInt(inputBlockPtr[i], iBuilder->getSizeTy());
    941         Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
    942         tempArgs.push_back(iBuilder->CreateIntToPtr(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
     928        Value * neededItems = kb->CreateSub(finalItemPos[i], blockItemPos);
     929        Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), blockItemPos);
     930        Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
     931        Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
     932        Value * inputPtr = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
     933        mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
     934        Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(availFromBase, blockSize));
     935        mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     936
     937        Value * itemAddress = kb->CreatePtrToInt(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]), intAddressTy);
     938        Value * baseAddress = kb->CreatePtrToInt(inputBlockPtr[i], intAddressTy);
     939        Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, kb->getSizeTy()), kb->CreateSub(itemAddress, baseAddress));
     940        tempArgs.push_back(kb->CreateIntToPtr(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
    943941    }
    944942
    945943    std::vector<Value *> blockItemPos;
    946944    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    947         Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(mStreamSetInputs.size() + i));
    948         tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    949         blockItemPos.push_back(iBuilder->CreateAnd(producedItemCount[i], blockBaseMask));
    950         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, outputBlockPtr[i], iBuilder->CreateSub(producedItemCount[i], blockItemPos[i]));
    951         Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
    952         Value * outputPtr = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
    953         Value * baseAddress = iBuilder->CreatePtrToInt(outputPtr, iBuilder->getSizeTy());
    954         Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
    955         tempArgs.push_back(iBuilder->CreateIntToPtr(tempAddress, mStreamSetOutputBuffers[i]->getPointerType()));
    956     }
    957 
    958    
    959     iBuilder->CreateCall(multiBlockFunction, tempArgs);
     945        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
     946        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
     947        blockItemPos.push_back(kb->CreateAnd(producedItemCount[i], blockBaseMask));
     948        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], blockItemPos[i]));
     949        Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetInputs[i].name, kb->getInt32(0), producedItemCount[i]), kb->getSizeTy());
     950        Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
     951        Value * baseAddress = kb->CreatePtrToInt(outputPtr, intAddressTy);
     952        Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddressTy), kb->CreateSub(itemAddress, baseAddress));
     953        tempArgs.push_back(kb->CreateIntToPtr(tempAddress, mStreamSetOutputBuffers[i]->getPointerType()));
     954    }
     955
     956
     957    kb->CreateCall(multiBlockFunction, tempArgs);
    960958
    961959    // Copy back data to the actual output buffers.
    962960
    963961    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    964         Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(mStreamSetInputs.size() + i));
    965         tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    966         Value * final_items = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
    967         Value * copyItems = iBuilder->CreateSub(final_items, blockItemPos[i]);
    968         Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(iBuilder, blockItemPos[i]); // must be a whole number of blocks.
    969         Value * outputPtr = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
    970         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, outputPtr, tempBufPtr, copyItems1);
    971         Value * copyItems2 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(copyItems, copyItems), iBuilder->getSize(0), iBuilder->CreateSub(copyItems, copyItems1));
    972         tempBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(copyItems1, blockSize));
    973         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
    974     }
    975 
    976     iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
     962        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
     963        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
     964        Value * final_items = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     965        Value * copyItems = kb->CreateSub(final_items, blockItemPos[i]);
     966        Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(kb.get(), blockItemPos[i]); // must be a whole number of blocks.
     967        Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
     968        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputPtr, tempBufPtr, copyItems1);
     969        Value * copyItems2 = kb->CreateSelect(kb->CreateICmpULT(copyItems, copyItems), kb->getSize(0), kb->CreateSub(copyItems, copyItems1));
     970        tempBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
     971        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), kb->getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
     972    }
     973
     974    kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
    977975
    978976    //  We've dealt with the partial block processing and copied information back into the
    979977    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    980978    //
    981     blocksRemaining->addIncoming(iBuilder->CreateSub(tempBlocksRemain, iBuilder->CreateZExt(haveBlocks, iBuilder->getSizeTy())), iBuilder->GetInsertBlock());
    982     iBuilder->CreateCondBr(haveBlocks, doSegmentOuterLoop, segmentDone);
    983     iBuilder->SetInsertPoint(segmentDone);
     979    blocksRemaining->addIncoming(kb->CreateSub(tempBlocksRemain, kb->CreateZExt(haveBlocks, kb->getSizeTy())), kb->GetInsertBlock());
     980    kb->CreateCondBr(haveBlocks, doSegmentOuterLoop, segmentDone);
     981    kb->SetInsertPoint(segmentDone);
    984982}
    985983
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5441 r5446  
    8383    //
    8484
    85     void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     85    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     86
     87    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
     88
     89    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
    8690
    8791    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
     
    182186        assert (port.first == Port::Input);
    183187        assert (port.second < mStreamSetInputBuffers.size());
     188        assert (mStreamSetInputBuffers[port.second]);
    184189        return mStreamSetInputBuffers[port.second];
    185190    }
     
    189194        assert (port.first == Port::Output);
    190195        assert (port.second < mStreamSetOutputBuffers.size());
     196        assert (mStreamSetOutputBuffers[port.second]);
    191197        return mStreamSetOutputBuffers[port.second];
    192198    }
     
    197203        if (port == Port::Input) {
    198204            assert (index < mStreamSetInputBuffers.size());
     205            assert (mStreamSetInputBuffers[index]);
    199206            return mStreamSetInputBuffers[index];
    200207        } else {
    201208            assert (index < mStreamSetOutputBuffers.size());
     209            assert (mStreamSetOutputBuffers[index]);
    202210            return mStreamSetOutputBuffers[index];
    203211        }
     
    219227    std::vector<llvm::Value *>          mAvailableItemCount;
    220228    llvm::Value *                       mOutputScalarResult;
    221 
    222229
    223230    std::vector<llvm::Type *>           mKernelFields;
     
    378385    // method of the multi-block kernel builder makes all the necessary arrangements
    379386    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
    380     void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
     387    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
    381388
    382389};
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5440 r5446  
    1111
    1212Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, Value * const index) {
     13    assert (instance);
     14    CreateAssert(instance, "instance cannot be null!");
    1315    return CreateGEP(instance, {getInt32(0), index});
    1416}
     
    3537
    3638Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) {
    37     return getScalarField(name + Kernel::BUFFER_PTR_SUFFIX);
     39    Value * const ptr = getScalarField(name + Kernel::BUFFER_PTR_SUFFIX);
     40    CreateAssert(ptr, name + " cannot be null!");
     41    return ptr;
    3842}
    3943
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5440 r5446  
    104104    void setConsumerLock(const std::string & name, llvm::Value * value);
    105105
    106     Kernel * getKernel() const {
     106    const Kernel * getKernel() const {
    107107        return mKernel;
    108108    }
    109109
    110     void setKernel(Kernel * const kernel) {
     110    void setKernel(const Kernel * const kernel) {
    111111        mKernel = kernel;
    112112    }
     
    128128
    129129protected:
    130     Kernel * mKernel;
     130    const Kernel * mKernel;
    131131};
    132132
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5445 r5446  
    9292 */
    9393Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
    94     Value * ptr = getBaseAddress(iBuilder, self);
    95 
    96     if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
    97         ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
    98     }
    99     Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
     94    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
     95    Value * relativePosition = absolutePosition;
    10096    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
    10197    if (bw < 8) {
     98        assert (bw  == 1 || bw == 2 || bw == 4);
    10299        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
    103100        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    104     }
    105     else {
     101    } else {
    106102        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
    107103    }
    108     Value * rawPointer = iBuilder->CreateGEP(ptr, relativePosition);
    109     return rawPointer;
     104    return iBuilder->CreateGEP(ptr, relativePosition);
    110105}
    111106
     
    133128}
    134129
    135 Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const /* iBuilder */, Value * self) const {
     130Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
     131    iBuilder->CreateAssert(self, "StreamSetBuffer base address cannot be 0");
    136132    return self;
    137133}
     
    200196
    201197Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * const self) const {
     198    iBuilder->CreateAssert(self, "SourceBuffer: instance cannot be null");
    202199    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    203200    Value * const addr = iBuilder->CreateLoad(ptr);
     201    iBuilder->CreateAssert(addr, "SourceBuffer: base address cannot be 0");
    204202    return addr;
    205203}
     
    231229}
    232230
     231Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
     232    Value * ptr = iBuilder->CreateGEP(getBaseAddress(iBuilder, self), {iBuilder->getInt32(0), streamIndex});
     233    Value * relativePosition = iBuilder->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * iBuilder->getBitBlockWidth()));
     234    const auto bw = mBaseType->getArrayElementType()->getScalarSizeInBits();
     235    if (bw < 8) {
     236        assert (bw  == 1 || bw == 2 || bw == 4);
     237        relativePosition = iBuilder->CreateUDiv(relativePosition, ConstantInt::get(relativePosition->getType(), 8 / bw));
     238        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
     239    } else {
     240        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getIntNTy(bw)->getPointerTo());
     241    }
     242    return iBuilder->CreateGEP(ptr, relativePosition);
     243}
     244
    233245// CircularCopybackBuffer Buffer
    234246void CircularCopybackBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     
    239251    Value * overFlowAreaPtr = iBuilder->CreateGEP(self, iBuilder->getSize(mBufferBlocks));
    240252    createBlockAlignedCopy(iBuilder, self, overFlowAreaPtr, overFlowItems);
    241 }
    242 
    243 Value * CircularCopybackBuffer::getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * blockIndex) const {
    244     return iBuilder->CreateGEP(getBaseAddress(iBuilder, self), modByBufferBlocks(iBuilder, blockIndex));
    245253}
    246254
     
    447455
    448456Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
    449     return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     457    iBuilder->CreateAssert(self, "ExpandableBuffer: instance cannot be null");
     458    Value * const baseAddr = iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     459    iBuilder->CreateAssert(self, "ExpandableBuffer: base address cannot be 0");
     460    return baseAddr;
    450461}
    451462
     
    469480
    470481SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
    471 : StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
     482: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 1, StructAddressSpace) {
    472483    mUniqueID = "B";
    473484    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
     
    477488
    478489ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
    479 : StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
     490: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 1, AddressSpace) {
    480491    mUniqueID = "E";
    481492    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
     
    489500}
    490501
     502CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
     503: StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
     504
     505}
     506
    491507CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
    492 : StreamSetBuffer(BufferKind::CircularCopybackBuffer, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
     508: CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace)
     509, mOverflowBlocks(overflowBlocks) {
    493510    mUniqueID = "CC" + std::to_string(bufferBlocks);
    494511    if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
     
    514531}
    515532
    516 inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
     533inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace)
    517534: mBufferKind(k)
    518535, mType(resolvedType)
    519 , mBufferBlocks(blocks)
     536, mBufferBlocks(BufferBlocks)
    520537, mAddressSpace(AddressSpace)
    521538, mStreamSetBufferPtr(nullptr)
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5440 r5446  
    6868    virtual llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self) const;
    6969
    70     llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
     70    virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    7171
    7272    virtual void setBaseAddress(IDISA::IDISA_Builder * const iBuilder, llvm::Value * addr, llvm::Value *) const;
     
    9999protected:
    100100
    101     StreamSetBuffer(BufferKind k, llvm::Type * baseType, llvm::Type * resolvedType, unsigned blocks, unsigned AddressSpace);
     101    StreamSetBuffer(BufferKind k, llvm::Type * baseType, llvm::Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace);
    102102
    103103    // Get the buffer pointer for a given block of the stream.
     
    186186};
    187187
    188 class CircularBuffer final : public StreamSetBuffer {
     188class CircularBuffer : public StreamSetBuffer {
    189189public:
    190190    static inline bool classof(const StreamSetBuffer * b) {
     
    194194    CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
    195195
    196 protected:
    197     llvm::Value * getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * blockIndex) const override;
     196    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * streamIndex, llvm::Value * absolutePosition) const final;
     197
     198protected:
     199
     200    CircularBuffer(const BufferKind kind, const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
     201
     202    llvm::Value * getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * blockIndex) const final;
    198203};
    199204   
     
    206211//  Kernels that read from a CircularCopybackBuffer must not access the overflow area.
    207212//
    208 class CircularCopybackBuffer final : public StreamSetBuffer {
     213class CircularCopybackBuffer final : public CircularBuffer {
    209214public:
    210215    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::CircularCopybackBuffer;}
     
    221226    llvm::Value * getLinearlyWritableBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * fromBlock) const override;
    222227   
    223 protected:
    224     llvm::Value * getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * blockIndex) const override;
    225228private:
    226229    size_t mOverflowBlocks;
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r5440 r5446  
    7373    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    7474
    75     StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     75    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments));
    7676    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    7777    StreamSetBuffer * const Extenders = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     
    8181
    8282   
    83     kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy(), segmentSize));
     83    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy(), segmentSize));
    8484    sourceK->setInitialArguments({inputStream, fileSize});
    8585    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
     
    100100
    101101    Kernel * outK = pxDriver.addKernelInstance(make_unique<FileSink>(iBuilder, 8));
    102     outK->setInitialArguments({iBuilder->CreatePointerCast(iBuilder->GetString(outputFile), iBuilder->getInt8PtrTy())});
     102    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    103103    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    104104 
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.cpp

    r5440 r5446  
    2424#include <pablo/ps_assign.h>
    2525#include <pablo/pablo_kernel.h>
    26 #include <kernels/kernel_builder.h>
     26#include <IR_Gen/idisa_builder.h>
     27#include <llvm/IR/Module.h>
    2728#include <llvm/Support/raw_os_ostream.h>
    2829
     
    4445
    4546Count * PabloBlock::createCount(PabloAST * expr) {
    46     Type * type = getParent()->getBuilder()->getSizeTy();
     47    IntegerType * const type = getParent()->getSizeTy();
    4748    return insertAtInsertionPoint(new (mAllocator) Count(expr, makeName("count"), type, mAllocator));
    4849}
    4950
    5051Count * PabloBlock::createCount(PabloAST * const expr, const llvm::StringRef & prefix)  {
    51     Type * type = getParent()->getBuilder()->getSizeTy();
     52    IntegerType * const type = getParent()->getSizeTy();
    5253    return insertAtInsertionPoint(new (mAllocator) Count(expr, makeName(prefix), type, mAllocator));
    5354}
     
    6364Var * PabloBlock::createVar(PabloAST * name, Type * type) {
    6465    if (type == nullptr) {
    65         type = getParent()->getBuilder()->getStreamTy();
     66        type = getParent()->getStreamTy();
    6667    }
    6768    if (LLVM_UNLIKELY(name == nullptr || !isa<String>(name))) {
     
    186187LessThan * PabloBlock::createLessThan(PabloAST * expr1, PabloAST * expr2) {
    187188    CHECK_SAME_TYPE(expr1, expr2);
    188     Type * type = getParent()->getBuilder()->getInt1Ty();
    189     return new (mAllocator) LessThan(type, expr1, expr2, mAllocator);
     189    IntegerType * const int1Ty = getParent()->getInt1Ty();
     190    return new (mAllocator) LessThan(int1Ty, expr1, expr2, mAllocator);
    190191}
    191192
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5440 r5446  
    9393
    9494Zeroes * PabloKernel::getNullValue(Type * type) {
    95     if (type == nullptr) {
    96         type = mBuilder->getStreamTy();
     95    if (LLVM_LIKELY(type == nullptr)) {
     96        type = getStreamTy();
    9797    }
    9898    for (PabloAST * constant : mConstants) {
     
    107107
    108108Ones * PabloKernel::getAllOnesValue(Type * type) {
    109     if (type == nullptr) {
    110         type = mBuilder->getStreamTy();
     109    if (LLVM_LIKELY(type == nullptr)) {
     110        type = getStreamTy();
    111111    }
    112112    for (PabloAST * constant : mConstants) {
     
    124124        setName(getName() + "_DumpTrace");
    125125    }
    126     mBuilder = iBuilder.get();
    127     generatePabloMethod();
    128     mBuilder = nullptr;
     126    mSizeTy = iBuilder->getSizeTy();
     127    mStreamTy = iBuilder->getStreamTy();
     128    generatePabloMethod();   
    129129    pablo_function_passes(this);
    130130    mPabloCompiler->initializeKernelData(iBuilder);
     131    mSizeTy = nullptr;
     132    mStreamTy = nullptr;
    131133    BlockOrientedKernel::prepareKernel(iBuilder);
    132134}
    133135
    134136void PabloKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    135     mBuilder = iBuilder.get();
     137    mSizeTy = iBuilder->getSizeTy();
     138    mStreamTy = iBuilder->getStreamTy();
    136139    mPabloCompiler->compile(iBuilder);
    137     mBuilder = nullptr;
     140    mSizeTy = nullptr;
     141    mStreamTy = nullptr;
    138142}
    139143
     
    152156Integer * PabloKernel::getInteger(const int64_t value) const {
    153157    return mSymbolTable->getInteger(value);
     158}
     159
     160llvm::IntegerType * PabloKernel::getInt1Ty() const {
     161    return IntegerType::getInt1Ty(getModule()->getContext());
    154162}
    155163
     
    168176, mSymbolTable(new SymbolGenerator(b->getContext(), mAllocator))
    169177, mEntryBlock(PabloBlock::Create(this))
    170 , mBuilder(nullptr)
    171 //, mSizeTy(b->getSizeTy())
    172 //, mSizeTy(b->getSizeTy())
    173 {
     178, mSizeTy(nullptr)
     179, mStreamTy(nullptr) {
    174180    prepareStreamSetNameMap();
    175181    for (const Binding & ss : mStreamSetInputs) {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5440 r5446  
    123123    Integer * getInteger(const int64_t value) const;
    124124
    125     kernel::KernelBuilder * getBuilder() {
    126         return mBuilder;
    127     }
    128 
    129125protected:
    130126
     
    136132
    137133    virtual void generatePabloMethod() = 0;
     134
     135    llvm::IntegerType * getSizeTy() const {
     136        assert (mSizeTy); return mSizeTy;
     137    }
     138
     139    llvm::VectorType * getStreamTy() const {
     140        assert (mStreamTy); return mStreamTy;
     141    }
     142
     143    llvm::IntegerType * getInt1Ty() const;
    138144
    139145private:
     
    157163    SymbolGenerator *               mSymbolTable;
    158164    PabloBlock *                    mEntryBlock;
    159 
    160     kernel::KernelBuilder *         mBuilder;
    161 
    162 //    llvm::IntegerType * const       mSizeTy;
    163 //    llvm::VectorType * const        mStreamSetTy;
    164 
     165    llvm::IntegerType *             mSizeTy;
     166    llvm::VectorType *              mStreamTy;
    165167    std::vector<Var *>              mInputs;
    166168    std::vector<Var *>              mOutputs;
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5440 r5446  
    560560                items = iBuilder->getProducedItemCount(outputs[0].name);
    561561            } else {
    562                 items = iBuilder->getProcessedItemCount(outputs[0].name);
     562                items = iBuilder->getProcessedItemCount(inputs[0].name);
    563563            }
    564564            Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5444 r5446  
    3535using namespace parabix;
    3636
     37using Kernel = kernel::Kernel;
     38using KernelBuilder = kernel::KernelBuilder;
     39
     40#ifndef NDEBUG
     41#define IN_DEBUG_MODE true
     42#else
     43#define IN_DEBUG_MODE false
     44#endif
     45
    3746namespace codegen {
    3847
     
    7281bool EnableAsserts;
    7382bool EnableCycleCounter;
    74 #ifndef NDEBUG
    75 #define IN_DEBUG_MODE true
    76 #else
    77 #define IN_DEBUG_MODE false
    78 #endif
    7983
    8084static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
     
    147151    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
    148152    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
    149 
    150153    builder.setTargetOptions(opts);
    151     builder.setVerifyModules(false);
     154    builder.setVerifyModules(IN_DEBUG_MODE || codegen::DebugOptionIsSet(codegen::VerifyIR));
    152155    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    153156    switch (codegen::OptLevel) {
     
    159162    }
    160163    builder.setOptLevel(optLevel);
    161 
    162164    setAllFeatures(builder);
    163165    mEngine = builder.create();
     
    178180    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
    179181
    180     iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
     182    iBuilder.reset(IDISA::GetIDISA_Builder(*mContext, mMainModule->getTargetTriple()));
    181183    iBuilder->setDriver(this);
    182184    iBuilder->setModule(mMainModule);
     
    194196}
    195197
    196 kernel::Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::Kernel> kb) {
     198Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<Kernel> kb) {
    197199    mOwnedKernels.emplace_back(std::move(kb));
    198200    return mOwnedKernels.back().get();
    199201}
    200202
    201 void ParabixDriver::addKernelCall(kernel::Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     203void ParabixDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    202204    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    203205    mPipeline.emplace_back(&kb);
    204     kb.createKernelStub(iBuilder, inputs, outputs);
    205 }
    206 
    207 void ParabixDriver::makeKernelCall(kernel::Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     206    kb.bindPorts(inputs, outputs);
     207    kb.makeModule(iBuilder);
     208}
     209
     210void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    208211    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    209212    mPipeline.emplace_back(kb);   
    210     kb->createKernelStub(iBuilder, inputs, outputs);
     213    kb->bindPorts(inputs, outputs);
     214    kb->makeModule(iBuilder);
    211215}
    212216
     
    256260
    257261void ParabixDriver::linkAndFinalize() {
    258     Module * module = nullptr;
     262
     263//    using WorkQueue = boost::lockfree::queue<Kernel *>;
     264
     265    legacy::PassManager PM;
     266    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
     267        PM.add(createVerifierPass());
     268    }
     269    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
     270    PM.add(createReassociatePass());             //Reassociate expressions.
     271    PM.add(createGVNPass());                     //Eliminate common subexpressions.
     272    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     273    PM.add(createCFGSimplificationPass());
     274
     275//    unsigned threadCount = std::thread::hardware_concurrency();
     276
     277    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
     278    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
     279//        threadCount = 1; // If we're dumping IR, disable seperate compilation
     280        if (codegen::IROutputFilename.empty()) {
     281            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     282        } else {
     283            std::error_code error;
     284            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     285        }
     286        PM.add(createPrintModulePass(*IROutputStream));
     287    }
     288
     289    #ifndef USE_LLVM_3_6
     290    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
     291    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
     292//        threadCount = 1; // If we're dumping ASM, disable seperate compilation
     293        if (codegen::ASMOutputFilename.empty()) {
     294            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     295        } else {
     296            std::error_code error;
     297            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
     298        }
     299        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
     300            report_fatal_error("LLVM error: could not add emit assembly pass");
     301        }
     302    }
     303    #endif
     304
    259305    try {
    260306
    261         legacy::PassManager PM;
    262 #ifndef NDEBUG
    263         PM.add(createVerifierPass());
    264 #else
    265         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    266             PM.add(createVerifierPass());
    267         }
    268 #endif
    269         PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    270         PM.add(createReassociatePass());             //Reassociate expressions.
    271         PM.add(createGVNPass());                     //Eliminate common subexpressions.
    272         PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    273         PM.add(createCFGSimplificationPass());
    274 
    275         std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    276         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    277             if (codegen::IROutputFilename.empty()) {
    278                 IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    279             } else {
    280                 std::error_code error;
    281                 IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    282             }
    283             PM.add(createPrintModulePass(*IROutputStream));
    284         }
    285 
    286         #ifndef USE_LLVM_3_6
    287         std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    288         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    289             if (codegen::ASMOutputFilename.empty()) {
    290                 ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    291             } else {
    292                 std::error_code error;
    293                 ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
    294             }
    295             if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    296                 report_fatal_error("LLVM error: could not add emit assembly pass");
    297             }
    298         }
    299         #endif
    300 
    301         for (kernel::Kernel * const kernel : mPipeline) {
     307//    if (threadCount > 1) {
     308
     309//        WorkQueue Q(mPipeline.size());
     310//        for (Kernel * kernel : mPipeline) {
     311//            Q.unsynchronized_push(kernel); assert (kernel);
     312//        }
     313
     314//        std::thread compilation_thread[threadCount - 1];
     315//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
     316//            compilation_thread[i] = std::thread([&]{
     317
     318//                llvm::LLVMContext C;
     319//                std::unique_ptr<KernelBuilder> kb(IDISA::GetIDISA_Builder(C, mMainModule->getTargetTriple()));
     320//                kb->setDriver(this);
     321
     322//                Kernel * kernel = nullptr;
     323//                while (Q.pop(kernel)) {
     324//                    kb->setKernel(kernel);
     325//                    Module * module = kernel->getModule();
     326//                    bool uncachedObject = true;
     327//                    if (mCache && mCache->loadCachedObjectFile(kb, kernel)) {
     328//                        uncachedObject = false;
     329//                    }
     330//                    if (uncachedObject) {
     331//                        module->setTargetTriple(mMainModule->getTargetTriple());
     332//                        kernel->generateKernel(kb);
     333//                        // PM.run(*module);
     334//                        mEngine->generateCodeForModule(module);
     335//                    }
     336//                    // mEngine->addModule(std::unique_ptr<Module>(module));
     337//                }
     338//            });
     339//        }
     340
     341//        // PM.run(*mMainModule);
     342
     343//        Kernel * kernel = nullptr;
     344//        while (Q.pop(kernel)) {
     345//            iBuilder->setKernel(kernel);
     346//            Module * module = kernel->getModule();
     347//            bool uncachedObject = true;
     348//            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
     349//                uncachedObject = false;
     350//            }
     351//            if (uncachedObject) {
     352//                module->setTargetTriple(mMainModule->getTargetTriple());
     353//                kernel->generateKernel(iBuilder);
     354//                // PM.run(*module);
     355//            }
     356//            mEngine->addModule(std::unique_ptr<Module>(module));
     357//            mEngine->generateCodeForModule(module);
     358//        }
     359
     360//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
     361//            compilation_thread[i].join();
     362//        }
     363
     364//        iBuilder->setKernel(nullptr);
     365
     366//    } else { // single threaded
     367
     368        for (Kernel * const kernel : mPipeline) {
     369
    302370            iBuilder->setKernel(kernel);
    303             module = kernel->getModule();
     371            Module * module = kernel->getModule();
    304372            bool uncachedObject = true;
    305373            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
     
    312380            }
    313381            mEngine->addModule(std::unique_ptr<Module>(module));
     382            mEngine->generateCodeForModule(module);
    314383        }
    315384
    316385        iBuilder->setKernel(nullptr);
    317         module = mMainModule;
    318         PM.run(*module);
    319 
    320         mEngine->finalizeObject();
    321 
    322     } catch (...) {
    323         module->dump();
    324         report_fatal_error("LLVM error: link or finalize.");
    325     }
    326 }
    327 
    328 const std::unique_ptr<kernel::KernelBuilder> & ParabixDriver::getBuilder() {
     386        PM.run(*mMainModule);
     387
     388//    }
     389
     390    mEngine->finalizeObject();
     391
     392    } catch (const std::exception & e) {
     393        report_fatal_error(e.what());
     394    }
     395
     396}
     397
     398const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
    329399    return iBuilder;
    330400}
     
    337407    delete mCache;
    338408}
    339 
    340 
    341 //void ParabixDriver::linkAndFinalize() {
    342 
    343 //    using KernelQueue = boost::lockfree::queue<kernel::KernelBuilder *>;
    344 
    345 //    legacy::PassManager PM;
    346 //    #ifndef NDEBUG
    347 //    PM.add(createVerifierPass());
    348 //    #endif
    349 //    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    350 //    PM.add(createReassociatePass());             //Reassociate expressions.
    351 //    PM.add(createGVNPass());                     //Eliminate common subexpressions.
    352 //    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    353 //    PM.add(createCFGSimplificationPass());
    354 
    355 //    raw_fd_ostream * IROutputStream = nullptr;
    356 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    357 //        if (codegen::IROutputFilename.empty()) {
    358 //            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
    359 //        } else {
    360 //            std::error_code error;
    361 //            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
    362 //        }
    363 //        PM.add(createPrintModulePass(*IROutputStream));
    364 //        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
    365 //    }
    366 
    367 //    #ifndef USE_LLVM_3_6
    368 //    raw_fd_ostream * ASMOutputStream = nullptr;
    369 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    370 //        if (codegen::ASMOutputFilename.empty()) {
    371 //            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
    372 //        } else {
    373 //            std::error_code error;
    374 //            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
    375 //        }
    376 //        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    377 //            report_fatal_error("LLVM error: could not add emit assembly pass");
    378 //        }
    379 //        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
    380 //    }
    381 //    #endif
    382 
    383 //    KernelQueue Q(mPipeline.size() + 1);
    384 //    for (kernel::KernelBuilder * kb : mPipeline) {
    385 //        assert (kb);
    386 //        Q.unsynchronized_push(kb);
    387 //    }
    388 
    389 //    std::thread compilation_thread[codegen::Jobs];
    390 //    for (int i = 0; i < codegen::Jobs; ++i) {
    391 //        compilation_thread[i] = std::thread([&]{
    392 //            kernel::KernelBuilder * kb = nullptr;
    393 //            Module * m = nullptr;
    394 //            try {
    395 //                while (Q.pop(kb)) {
    396 //                    m = kb->getModule();
    397 //                    bool uncachedObject = true;
    398 //                    if (mCache && mCache->loadCachedObjectFile(kb)) {
    399 //                        uncachedObject = false;
    400 //                    }
    401 //                    if (uncachedObject) {
    402 //                        Module * const cm = iBuilder->getModule();
    403 //                        iBuilder->setModule(m);
    404 //                        kb->generateKernel();
    405 //                        PM.run(*m);
    406 //                        iBuilder->setModule(cm);
    407 //                    }
    408 //                    mEngine->addModule(std::unique_ptr<Module>(m));
    409 //                }
    410 //            } catch (...) {
    411 //                // clear the queue
    412 //                while (Q.pop(kb));
    413 //                // dump the result the module to the console
    414 //                if (m) m->dump();
    415 //                throw;
    416 //            }
    417 //        });
    418 //    }
    419 
    420 //    PM.run(*mMainModule);
    421 //    for (int i = 0; i < codegen::Jobs; ++i) {
    422 //        compilation_thread[i].join();
    423 //    }
    424 //    mEngine->finalizeObject();
    425 
    426 //    delete IROutputStream;
    427 //    #ifndef USE_LLVM_3_6
    428 //    delete ASMOutputStream;
    429 //    #endif
    430 
    431 //}
    432 
    433 
    434 //            std::unique_ptr<IDISA::IDISA_Builder> idb(IDISA::GetIDISA_Builder(kb->getModule()));
    435 //            idb->setDriver(this);
    436 //            kb->setBuilder(idb.get());
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5444 r5446  
    9494    llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    9595
     96    void compileSingleThread();
     97
     98    void compileMultiThread(const unsigned threadCount);
     99
     100
    96101private:
    97102    std::unique_ptr<llvm::LLVMContext>                      mContext;
Note: See TracChangeset for help on using the changeset viewer.