Ignore:
Timestamp:
May 10, 2017, 4:26:11 PM (2 years ago)
Author:
nmedfort
Message:

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

Location:
icGREP/icgrep-devel/icgrep
Files:
73 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5436 r5440  
    1818
    1919Value * CBuilder::CreateOpenCall(Value * filename, Value * oflag, Value * mode) {
    20     Function * openFn = mMod->getFunction("open");
     20    Module * const m = getModule();
     21    Function * openFn = m->getFunction("open");
    2122    if (openFn == nullptr) {
    2223        IntegerType * int32Ty = getInt32Ty();
    2324        PointerType * int8PtrTy = getInt8PtrTy();
    24         openFn = cast<Function>(mMod->getOrInsertFunction("open",
     25        openFn = cast<Function>(m->getOrInsertFunction("open",
    2526                                                         int32Ty, int8PtrTy, int32Ty, int32Ty, nullptr));
    2627    }
     
    3132Value * CBuilder::CreateWriteCall(Value * fileDescriptor, Value * buf, Value * nbyte) {
    3233    PointerType * voidPtrTy = getVoidPtrTy();
    33     Function * write = mMod->getFunction("write");
     34    Module * const m = getModule();
     35    Function * write = m->getFunction("write");
    3436    if (write == nullptr) {
    3537        IntegerType * sizeTy = getSizeTy();
    3638        IntegerType * int32Ty = getInt32Ty();
    37         write = cast<Function>(mMod->getOrInsertFunction("write",
     39        write = cast<Function>(m->getOrInsertFunction("write",
    3840                                                        AttributeSet().addAttribute(getContext(), 2U, Attribute::NoAlias),
    3941                                                        sizeTy, int32Ty, voidPtrTy, sizeTy, nullptr));
     
    4547Value * CBuilder::CreateReadCall(Value * fileDescriptor, Value * buf, Value * nbyte) {
    4648    PointerType * voidPtrTy = getVoidPtrTy();
    47     Function * readFn = mMod->getFunction("read");
     49    Module * const m = getModule();
     50    Function * readFn = m->getFunction("read");
    4851    if (readFn == nullptr) {
    4952        IntegerType * sizeTy = getSizeTy();
    5053        IntegerType * int32Ty = getInt32Ty();
    51         readFn = cast<Function>(mMod->getOrInsertFunction("read",
     54        readFn = cast<Function>(m->getOrInsertFunction("read",
    5255                                                         AttributeSet().addAttribute(getContext(), 2U, Attribute::NoAlias),
    5356                                                         sizeTy, int32Ty, voidPtrTy, sizeTy, nullptr));
     
    5861
    5962Value * CBuilder::CreateCloseCall(Value * fileDescriptor) {
    60     Function * closeFn = mMod->getFunction("close");
     63    Module * const m = getModule();
     64    Function * closeFn = m->getFunction("close");
    6165    if (closeFn == nullptr) {
    6266        IntegerType * int32Ty = getInt32Ty();
    6367        FunctionType * fty = FunctionType::get(int32Ty, {int32Ty}, true);
    64         closeFn = Function::Create(fty, Function::ExternalLinkage, "close", mMod);
     68        closeFn = Function::Create(fty, Function::ExternalLinkage, "close", m);
    6569    }
    6670    return CreateCall(closeFn, fileDescriptor);
     
    6973
    7074Value * CBuilder::CreateUnlinkCall(Value * path) {
    71     Function * unlinkFunc = mMod->getFunction("unlink");
     75    Module * const m = getModule();
     76    Function * unlinkFunc = m->getFunction("unlink");
    7277    if (unlinkFunc == nullptr) {
    7378        FunctionType * fty = FunctionType::get(getInt32Ty(), {getInt8PtrTy()}, false);
    74         unlinkFunc = Function::Create(fty, Function::ExternalLinkage, "unlink", mMod);
     79        unlinkFunc = Function::Create(fty, Function::ExternalLinkage, "unlink", m);
    7580        unlinkFunc->setCallingConv(CallingConv::C);
    7681    }
     
    7984
    8085Value * CBuilder::CreateMkstempCall(Value * ftemplate) {
    81     Function * mkstempFn = mMod->getFunction("mkstemp");
     86    Module * const m = getModule();
     87    Function * mkstempFn = m->getFunction("mkstemp");
    8288    if (mkstempFn == nullptr) {
    83         mkstempFn = cast<Function>(mMod->getOrInsertFunction("mkstemp", getInt32Ty(), getInt8PtrTy(), nullptr));
     89        mkstempFn = cast<Function>(m->getOrInsertFunction("mkstemp", getInt32Ty(), getInt8PtrTy(), nullptr));
    8490    }
    8591    return CreateCall(mkstempFn, ftemplate);
     
    8894
    8995Value * CBuilder::CreateStrlenCall(Value * str) {
    90     Function * strlenFn = mMod->getFunction("strlen");
     96    Module * const m = getModule();
     97    Function * strlenFn = m->getFunction("strlen");
    9198    if (strlenFn == nullptr) {
    92         strlenFn = cast<Function>(mMod->getOrInsertFunction("strlen", getSizeTy(), getInt8PtrTy(), nullptr));
     99        strlenFn = cast<Function>(m->getOrInsertFunction("strlen", getSizeTy(), getInt8PtrTy(), nullptr));
    93100    }
    94101    return CreateCall(strlenFn, str);
     
    97104
    98105Function * CBuilder::GetPrintf() {
    99     Function * printf = mMod->getFunction("printf");
     106    Module * const m = getModule();
     107    Function * printf = m->getFunction("printf");
    100108    if (printf == nullptr) {
    101109        FunctionType * fty = FunctionType::get(getInt32Ty(), {getInt8PtrTy()}, true);
    102         printf = Function::Create(fty, Function::ExternalLinkage, "printf", mMod);
     110        printf = Function::Create(fty, Function::ExternalLinkage, "printf", m);
    103111        printf->addAttribute(1, Attribute::NoAlias);
    104112    }
     
    107115
    108116Function * CBuilder::GetDprintf() {
    109     Function * dprintf = mMod->getFunction("dprintf");
     117    Module * const m = getModule();
     118    Function * dprintf = m->getFunction("dprintf");
    110119    if (dprintf == nullptr) {
    111120        FunctionType * fty = FunctionType::get(getInt32Ty(), {getInt32Ty(), getInt8PtrTy()}, true);
    112         dprintf = Function::Create(fty, Function::ExternalLinkage, "dprintf", mMod);
     121        dprintf = Function::Create(fty, Function::ExternalLinkage, "dprintf", m);
    113122    }
    114123    return dprintf;
     
    116125
    117126void CBuilder::CallPrintInt(const std::string & name, Value * const value) {
    118     Constant * printRegister = mMod->getFunction("PrintInt");
     127    Module * const m = getModule();
     128    Constant * printRegister = m->getFunction("PrintInt");
    119129    IntegerType * int64Ty = getInt64Ty();
    120130    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    121131        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt8PtrTy(), int64Ty }, false);
    122         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintInt", mMod);
     132        Function * function = Function::Create(FT, Function::InternalLinkage, "PrintInt", m);
    123133        auto arg = function->arg_begin();
    124134        std::string out = "%-40s = %" PRIx64 "\n";
     
    149159
    150160void CBuilder::CallPrintIntToStderr(const std::string & name, Value * const value) {
    151     Constant * printRegister = mMod->getFunction("PrintIntToStderr");
     161    Module * const m = getModule();
     162    Constant * printRegister = m->getFunction("PrintIntToStderr");
    152163    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    153164        FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0), getSizeTy() }, false);
    154         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintIntToStderr", mMod);
     165        Function * function = Function::Create(FT, Function::InternalLinkage, "PrintIntToStderr", m);
    155166        auto arg = function->arg_begin();
    156167        std::string out = "%-40s = %" PRIx64 "\n";
     
    182193
    183194void CBuilder::CallPrintMsgToStderr(const std::string & message) {
    184     Constant * printMsg = mMod->getFunction("PrintMsgToStderr");
     195    Module * const m = getModule();
     196    Constant * printMsg = m->getFunction("PrintMsgToStderr");
    185197    if (LLVM_UNLIKELY(printMsg == nullptr)) {
    186198        FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0) }, false);
    187         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintMsgToStderr", mMod);
     199        Function * function = Function::Create(FT, Function::InternalLinkage, "PrintMsgToStderr", m);
    188200        auto arg = function->arg_begin();
    189201        std::string out = "%s\n";
     
    219231    if (malloc == nullptr) {
    220232        FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
    221         malloc = Function::Create(fty, Function::ExternalLinkage, "malloc", mMod);
     233        malloc = Function::Create(fty, Function::ExternalLinkage, "malloc", m);
    222234        malloc->setCallingConv(CallingConv::C);
    223235        malloc->setDoesNotAlias(0);
     
    236248        report_fatal_error("CreateAlignedMalloc: alignment must be a power of 2");
    237249    }
    238     DataLayout DL(mMod);
     250    Module * const m = getModule();
     251    DataLayout DL(m);
    239252    IntegerType * const intTy = getIntPtrTy(DL);
    240     Function * aligned_malloc = mMod->getFunction("aligned_malloc" + std::to_string(alignment));
     253    Function * aligned_malloc = m->getFunction("aligned_malloc" + std::to_string(alignment));
    241254    if (LLVM_UNLIKELY(aligned_malloc == nullptr)) {
    242255        const auto ip = saveIP();
    243256        PointerType * const voidPtrTy = getVoidPtrTy();
    244257        FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
    245         aligned_malloc = Function::Create(fty, Function::InternalLinkage, "aligned_malloc" + std::to_string(alignment), mMod);
     258        aligned_malloc = Function::Create(fty, Function::InternalLinkage, "aligned_malloc" + std::to_string(alignment), m);
    246259        aligned_malloc->setCallingConv(CallingConv::C);
    247260        aligned_malloc->setDoesNotAlias(0);
     
    288301
    289302Value * CBuilder::CreateMMap(Value * const addr, Value * size, Value * const prot, Value * const flags, Value * const fd, Value * const offset) {
    290     Function * fMMap = mMod->getFunction("mmap");
     303    Module * const m = getModule();
     304    Function * fMMap = m->getFunction("mmap");
    291305    if (LLVM_UNLIKELY(fMMap == nullptr)) {
    292306        PointerType * const voidPtrTy = getVoidPtrTy();
     
    294308        IntegerType * const sizeTy = getSizeTy();
    295309        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, intTy, intTy, intTy, sizeTy}, false);
    296         fMMap = Function::Create(fty, Function::ExternalLinkage, "mmap", mMod);
     310        fMMap = Function::Create(fty, Function::ExternalLinkage, "mmap", m);
    297311    }
    298312    Value * ptr = CreateCall(fMMap, {addr, size, prot, flags, fd, offset});
     
    332346    Value * result = nullptr;
    333347    if (T.isOSLinux() || T.isOSDarwin()) {
     348        Module * const m = getModule();
    334349        IntegerType * const intTy = getInt32Ty();
    335350        IntegerType * const sizeTy = getSizeTy();
    336351        PointerType * const voidPtrTy = getVoidPtrTy();
    337         Function * MAdviseFunc = mMod->getFunction("madvise");
     352        Function * MAdviseFunc = m->getFunction("madvise");
    338353        if (LLVM_UNLIKELY(MAdviseFunc == nullptr)) {
    339354            FunctionType * fty = FunctionType::get(intTy, {voidPtrTy, sizeTy, intTy}, false);
    340             MAdviseFunc = Function::Create(fty, Function::ExternalLinkage, "madvise", mMod);
     355            MAdviseFunc = Function::Create(fty, Function::ExternalLinkage, "madvise", m);
    341356        }
    342357        addr = CreatePointerCast(addr, voidPtrTy);
     
    361376
    362377Value * CBuilder::CheckMMapSuccess(Value * const addr) {
    363     DataLayout DL(mMod);
     378    Module * const m = getModule();
     379    DataLayout DL(m);
    364380    IntegerType * const intTy = getIntPtrTy(DL);
    365381    return CreateICmpNE(CreatePtrToInt(addr, intTy), ConstantInt::getAllOnesValue(intTy)); // MAP_FAILED = -1
     
    374390    Value * ptr = nullptr;
    375391    if (T.isOSLinux()) {
    376         DataLayout DL(mMod);
     392        Module * const m = getModule();
     393        DataLayout DL(m);
    377394        PointerType * const voidPtrTy = getVoidPtrTy();
    378395        IntegerType * const sizeTy = getSizeTy();
    379396        IntegerType * const intTy = getIntPtrTy(DL);
    380         Function * fMRemap = mMod->getFunction("mremap");
     397        Function * fMRemap = m->getFunction("mremap");
    381398        if (LLVM_UNLIKELY(fMRemap == nullptr)) {
    382399            FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, sizeTy, intTy}, false);
    383             fMRemap = Function::Create(fty, Function::ExternalLinkage, "mremap", mMod);
     400            fMRemap = Function::Create(fty, Function::ExternalLinkage, "mremap", m);
    384401        }
    385402        addr = CreatePointerCast(addr, voidPtrTy);
     
    402419    IntegerType * const sizeTy = getSizeTy();
    403420    PointerType * const voidPtrTy = getVoidPtrTy();
    404     Function * munmapFunc = mMod->getFunction("munmap");
     421    Module * const m = getModule();
     422    Function * munmapFunc = m->getFunction("munmap");
    405423    if (LLVM_UNLIKELY(munmapFunc == nullptr)) {
    406424        FunctionType * const fty = FunctionType::get(sizeTy, {voidPtrTy, sizeTy}, false);
    407         munmapFunc = Function::Create(fty, Function::ExternalLinkage, "munmap", mMod);
     425        munmapFunc = Function::Create(fty, Function::ExternalLinkage, "munmap", m);
    408426    }
    409427    len = CreateZExtOrTrunc(len, sizeTy);
     
    427445    if (free == nullptr) {
    428446        FunctionType * fty = FunctionType::get(getVoidTy(), {voidPtrTy}, false);
    429         free = Function::Create(fty, Function::ExternalLinkage, "free", mMod);
     447        Module * const m = getModule();
     448        free = Function::Create(fty, Function::ExternalLinkage, "free", m);
    430449        free->setCallingConv(CallingConv::C);
    431450    }
     
    464483
    465484Value * CBuilder::CreateRealloc(Value * ptr, Value * size) {
    466     DataLayout DL(getModule());
     485    Module * const m = getModule();
     486    DataLayout DL(m);
    467487    IntegerType * const intTy = getIntPtrTy(DL);
    468488    PointerType * type = cast<PointerType>(ptr->getType());
     
    474494        }
    475495    }
    476     Module * const m = getModule();
    477496    Function * realloc = m->getFunction("realloc");
    478497    if (realloc == nullptr) {
    479498        PointerType * const voidPtrTy = getVoidPtrTy();
    480         FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, intTy}, false);
    481         realloc = Function::Create(fty, Function::ExternalLinkage, "realloc", mMod);
     499        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, intTy}, false);       
     500        realloc = Function::Create(fty, Function::ExternalLinkage, "realloc", m);
    482501        realloc->setCallingConv(CallingConv::C);
    483502        realloc->setDoesNotAlias(1);
     
    517536
    518537Value * CBuilder::CreateFOpenCall(Value * filename, Value * mode) {
    519     Function * fOpenFunc = mMod->getFunction("fopen");
     538    Module * const m = getModule();
     539    Function * fOpenFunc = m->getFunction("fopen");
    520540    if (fOpenFunc == nullptr) {
    521541        FunctionType * fty = FunctionType::get(getFILEptrTy(), {getInt8Ty()->getPointerTo(), getInt8Ty()->getPointerTo()}, false);
    522         fOpenFunc = Function::Create(fty, Function::ExternalLinkage, "fopen", mMod);
     542        fOpenFunc = Function::Create(fty, Function::ExternalLinkage, "fopen", m);
    523543        fOpenFunc->setCallingConv(CallingConv::C);
    524544    }
     
    527547
    528548Value * CBuilder::CreateFReadCall(Value * ptr, Value * size, Value * nitems, Value * stream) {
    529     Function * fReadFunc = mMod->getFunction("fread");
     549    Module * const m = getModule();
     550    Function * fReadFunc = m->getFunction("fread");
    530551    PointerType * const voidPtrTy = getVoidPtrTy();
    531552    if (fReadFunc == nullptr) {
    532553        IntegerType * const sizeTy = getSizeTy();
    533554        FunctionType * fty = FunctionType::get(sizeTy, {voidPtrTy, sizeTy, sizeTy, getFILEptrTy()}, false);
    534         fReadFunc = Function::Create(fty, Function::ExternalLinkage, "fread", mMod);
     555        fReadFunc = Function::Create(fty, Function::ExternalLinkage, "fread", m);
    535556        fReadFunc->setCallingConv(CallingConv::C);
    536557    }
     
    540561
    541562Value * CBuilder::CreateFWriteCall(Value * ptr, Value * size, Value * nitems, Value * stream) {
    542     Function * fWriteFunc = mMod->getFunction("fwrite");
     563    Module * const m = getModule();
     564    Function * fWriteFunc = m->getFunction("fwrite");
    543565    PointerType * const voidPtrTy = getVoidPtrTy();
    544566    if (fWriteFunc == nullptr) {
    545567        IntegerType * const sizeTy = getSizeTy();
    546568        FunctionType * fty = FunctionType::get(sizeTy, {voidPtrTy, sizeTy, sizeTy, getFILEptrTy()}, false);
    547         fWriteFunc = Function::Create(fty, Function::ExternalLinkage, "fwrite", mMod);
     569        fWriteFunc = Function::Create(fty, Function::ExternalLinkage, "fwrite", m);
    548570        fWriteFunc->setCallingConv(CallingConv::C);
    549571    }
     
    553575
    554576Value * CBuilder::CreateFCloseCall(Value * stream) {
    555     Function * fCloseFunc = mMod->getFunction("fclose");
     577    Module * const m = getModule();
     578    Function * fCloseFunc = m->getFunction("fclose");
    556579    if (fCloseFunc == nullptr) {
    557580        FunctionType * fty = FunctionType::get(getInt32Ty(), {getFILEptrTy()}, false);
    558         fCloseFunc = Function::Create(fty, Function::ExternalLinkage, "fclose", mMod);
     581        fCloseFunc = Function::Create(fty, Function::ExternalLinkage, "fclose", m);
    559582        fCloseFunc->setCallingConv(CallingConv::C);
    560583    }
     
    563586
    564587Value * CBuilder::CreateRenameCall(Value * oldName, Value * newName) {
    565     Function * renameFunc = mMod->getFunction("rename");
     588    Module * const m = getModule();
     589    Function * renameFunc = m->getFunction("rename");
    566590    if (renameFunc == nullptr) {
    567591        FunctionType * fty = FunctionType::get(getInt32Ty(), {getInt8PtrTy(), getInt8PtrTy()}, false);
    568         renameFunc = Function::Create(fty, Function::ExternalLinkage, "rename", mMod);
     592        renameFunc = Function::Create(fty, Function::ExternalLinkage, "rename", m);
    569593        renameFunc->setCallingConv(CallingConv::C);
    570594    }
     
    573597
    574598Value * CBuilder::CreateRemoveCall(Value * path) {
    575     Function * removeFunc = mMod->getFunction("remove");
     599    Module * const m = getModule();
     600    Function * removeFunc = m->getFunction("remove");
    576601    if (removeFunc == nullptr) {
    577602        FunctionType * fty = FunctionType::get(getInt32Ty(), {getInt8PtrTy()}, false);
    578         removeFunc = Function::Create(fty, Function::ExternalLinkage, "remove", mMod);
     603        removeFunc = Function::Create(fty, Function::ExternalLinkage, "remove", m);
    579604        removeFunc->setCallingConv(CallingConv::C);
    580605    }
     
    583608
    584609Value * CBuilder::CreatePThreadCreateCall(Value * thread, Value * attr, Function * start_routine, Value * arg) {
     610    Module * const m = getModule();
    585611    Type * const voidPtrTy = getVoidPtrTy();
    586     Function * pthreadCreateFunc = mMod->getFunction("pthread_create");
     612    Function * pthreadCreateFunc = m->getFunction("pthread_create");
    587613    if (pthreadCreateFunc == nullptr) {
    588614        Type * pthreadTy = getSizeTy();
    589615        FunctionType * funVoidPtrVoidTy = FunctionType::get(getVoidTy(), {getVoidPtrTy()}, false);
    590616        FunctionType * fty = FunctionType::get(getInt32Ty(), {pthreadTy->getPointerTo(), voidPtrTy, funVoidPtrVoidTy->getPointerTo(), voidPtrTy}, false);
    591         pthreadCreateFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_create", mMod);
     617        pthreadCreateFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_create", m);
    592618        pthreadCreateFunc->setCallingConv(CallingConv::C);
    593619    }
     
    596622
    597623Value * CBuilder::CreatePThreadExitCall(Value * value_ptr) {
    598     Function * pthreadExitFunc = mMod->getFunction("pthread_exit");
     624    Module * const m = getModule();
     625    Function * pthreadExitFunc = m->getFunction("pthread_exit");
    599626    if (pthreadExitFunc == nullptr) {
    600627        FunctionType * fty = FunctionType::get(getVoidTy(), {getVoidPtrTy()}, false);
    601         pthreadExitFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_exit", mMod);
     628        pthreadExitFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_exit", m);
    602629        pthreadExitFunc->addFnAttr(Attribute::NoReturn);
    603630        pthreadExitFunc->setCallingConv(CallingConv::C);
     
    609636
    610637Value * CBuilder::CreatePThreadJoinCall(Value * thread, Value * value_ptr){
    611     Function * pthreadJoinFunc = mMod->getFunction("pthread_join");
     638    Module * const m = getModule();
     639    Function * pthreadJoinFunc = m->getFunction("pthread_join");
    612640    if (pthreadJoinFunc == nullptr) {
    613641        Type * pthreadTy = getSizeTy();
    614642        FunctionType * fty = FunctionType::get(getInt32Ty(), {pthreadTy, getVoidPtrTy()->getPointerTo()}, false);
    615         pthreadJoinFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_join", mMod);
     643        pthreadJoinFunc = Function::Create(fty, Function::ExternalLinkage, "pthread_join", m);
    616644        pthreadJoinFunc->setCallingConv(CallingConv::C);
    617645    }
     
    619647}
    620648
    621 void CBuilder::CreateAssert(Value * const assertion, StringRef failureMessage) {
     649void CBuilder::CreateAssert(Value * const assertion, StringRef failureMessage) {   
    622650    if (codegen::EnableAsserts) {
    623         Function * function = mMod->getFunction("__assert");
     651        Module * const m = getModule();
     652        Function * function = m->getFunction("__assert");
    624653        if (LLVM_UNLIKELY(function == nullptr)) {
    625654            auto ip = saveIP();
    626655            FunctionType * fty = FunctionType::get(getVoidTy(), { getInt1Ty(), getInt8PtrTy(), getSizeTy() }, false);
    627             function = Function::Create(fty, Function::PrivateLinkage, "__assert", mMod);
     656            function = Function::Create(fty, Function::PrivateLinkage, "__assert", m);
    628657            function->setDoesNotThrow();
    629658            function->setDoesNotAlias(2);
     
    661690
    662691void CBuilder::CreateExit(const int exitCode) {
    663     Function * exit = mMod->getFunction("exit");
     692    Module * const m = getModule();
     693    Function * exit = m->getFunction("exit");
    664694    if (LLVM_UNLIKELY(exit == nullptr)) {
    665695        FunctionType * fty = FunctionType::get(getVoidTy(), {getInt32Ty()}, false);
    666         exit = Function::Create(fty, Function::ExternalLinkage, "exit", mMod);
     696        exit = Function::Create(fty, Function::ExternalLinkage, "exit", m);
    667697        exit->setDoesNotReturn();
    668698        exit->setDoesNotThrow();
    669699    }
    670700    CreateCall(exit, getInt32(exitCode));
     701}
     702
     703llvm::BasicBlock * CBuilder::CreateBasicBlock(std::string && name) {
     704    return BasicBlock::Create(getContext(), name, GetInsertBlock()->getParent());
    671705}
    672706
     
    679713}
    680714
    681 inline static unsigned ceil_log2(const unsigned v) {
    682     assert ("log2(0) is undefined!" && v != 0);
    683     return 32 - __builtin_clz(v - 1);
    684 }
    685 
    686715Value * CBuilder::CreatePopcount(Value * bits) {
    687     Value * ctpopFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctpop, bits->getType());
     716    Value * ctpopFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, bits->getType());
    688717    return CreateCall(ctpopFunc, bits);
    689718}
    690719
    691720Value * CBuilder::CreateCountForwardZeroes(Value * value) {
    692     Value * cttzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::cttz, value->getType());
     721    Value * cttzFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::cttz, value->getType());
    693722    return CreateCall(cttzFunc, {value, ConstantInt::getFalse(getContext())});
    694723}
    695724
    696725Value * CBuilder::CreateCountReverseZeroes(Value * value) {
    697     Value * ctlzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctlz, value->getType());
     726    Value * ctlzFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::ctlz, value->getType());
    698727    return CreateCall(ctlzFunc, {value, ConstantInt::getFalse(getContext())});
    699728}
     
    728757
    729758Value * CBuilder::GetString(StringRef Str) {
    730     Value * ptr = mMod->getGlobalVariable(Str, true);
     759    Module * const m = getModule();
     760    Value * ptr = m->getGlobalVariable(Str, true);
    731761    if (ptr == nullptr) {
    732762        ptr = CreateGlobalString(Str, Str);
     
    737767
    738768Value * CBuilder::CreateReadCycleCounter() {
    739     Value * cycleCountFunc = Intrinsic::getDeclaration(mMod, Intrinsic::readcyclecounter);
     769    Module * const m = getModule();
     770    Value * cycleCountFunc = Intrinsic::getDeclaration(m, Intrinsic::readcyclecounter);
    740771    return CreateCall(cycleCountFunc, std::vector<Value *>({}));
    741772}
     
    743774Function * CBuilder::LinkFunction(llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    744775    assert (mDriver);
    745     return mDriver->LinkFunction(mMod, name, type, functionPtr);
     776    return mDriver->LinkFunction(getModule(), name, type, functionPtr);
    746777}
    747778
    748779CBuilder::CBuilder(llvm::LLVMContext & C, const unsigned GeneralRegisterWidthInBits)
    749780: IRBuilder<>(C)
    750 , mMod(nullptr)
    751781, mCacheLineAlignment(64)
    752782, mSizeType(getIntNTy(GeneralRegisterWidthInBits))
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5436 r5440  
    1010#include <llvm/IR/Constants.h>
    1111#include <llvm/ADT/Triple.h>
     12#ifndef NDEBUG
     13#include <llvm/IR/Function.h>
     14#endif
    1215
    1316namespace kernels { class KernelBuilder; }
     
    3033
    3134    llvm::Module * getModule() const {
    32         return mMod;
    33     }
    34 
    35     void setModule(llvm::Module * const mod) {
    36         mMod = mod;
     35        #ifndef NDEBUG
     36        llvm::BasicBlock * const bb = GetInsertBlock();
     37        if (bb) {
     38            llvm::Function * const f = bb->getParent();
     39            assert ("CBuilder has an insert point that is not contained within a Function" && f);
     40            assert ("CBuilder module differs from insertion point module" && (mModule == f->getParent()));
     41        }
     42        #endif
     43        return mModule;
     44    }
     45
     46    void setModule(llvm::Module * module) {
     47        mModule = module;
     48        ClearInsertionPoint();
    3749    }
    3850
     
    173185    }
    174186
     187    llvm::BasicBlock * CreateBasicBlock(std::string && name);
     188
    175189    virtual bool supportsIndirectBr() const {
    176190        return true;
     
    210224
    211225protected:
    212     llvm::Module *                  mMod;
     226
     227    llvm::Module *                  mModule;
    213228    unsigned                        mCacheLineAlignment;
    214229    llvm::IntegerType *             mSizeType;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5436 r5440  
    1717    if (mBitBlockWidth == 256) {
    1818        if (fw == 64) {
    19             Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     19            Value * signmask_f64func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx_movmsk_pd_256);
    2020            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    2121            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    2222            return CreateCall(signmask_f64func, a_as_pd);
    2323        } else if (fw == 32) {
    24             Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     24            Value * signmask_f32func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx_movmsk_ps_256);
    2525            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    2626            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     
    3838            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    3939            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
    40             Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     40            Value * signmask_f32func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx_movmsk_ps_256);
    4141            return CreateCall(signmask_f32func, pack_as_ps);
    4242        }
     
    9696Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    9797    if ((fw == 128) && (mBitBlockWidth == 256)) {
    98         Value * vperm2i128func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx2_vperm2i128);
     98        Value * vperm2i128func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_vperm2i128);
    9999        return CreateCall(vperm2i128func, {fwCast(64, a), fwCast(64, b), getInt8(0x31)});
    100100    }
     
    105105Value * IDISA_AVX2_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
    106106    if ((fw == 128) && (mBitBlockWidth == 256)) {
    107         Value * vperm2i128func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx2_vperm2i128);
     107        Value * vperm2i128func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_vperm2i128);
    108108        return CreateCall(vperm2i128func, {fwCast(64, a), fwCast(64, b), getInt8(0x20)});
    109109    }
     
    114114Value * IDISA_AVX2_Builder::hsimd_packl_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
    115115    if ((fw == 16)  && (lanes == 2)) {
    116         Value * vpackuswbfunc = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx2_packuswb);
     116        Value * vpackuswbfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_packuswb);
    117117        Value * a_low = fwCast(16, simd_and(a, simd_lomask(fw)));
    118118        Value * b_low = fwCast(16, simd_and(b, simd_lomask(fw)));
     
    125125Value * IDISA_AVX2_Builder::hsimd_packh_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) {
    126126    if ((fw == 16)  && (lanes == 2)) {
    127         Value * vpackuswbfunc = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx2_packuswb);
     127        Value * vpackuswbfunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_packuswb);
    128128        Value * a_low = simd_srli(fw, a, fw/2);
    129129        Value * b_low = simd_srli(fw, b, fw/2);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5436 r5440  
    3030
    3131void IDISA_Builder::CallPrintRegister(const std::string & name, Value * const value) {
    32     Constant * printRegister = mMod->getFunction("PrintRegister");
     32    Module * const m = getModule();
     33    Constant * printRegister = m->getFunction("PrintRegister");
    3334    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    3435        FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0), getBitBlockType() }, false);
    35         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintRegister", mMod);
     36        Function * function = Function::Create(FT, Function::InternalLinkage, "PrintRegister", m);
    3637        auto arg = function->arg_begin();
    3738        std::string tmp;
     
    4243        }
    4344        out << '\n';
    44         BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", function);
     45        BasicBlock * entry = BasicBlock::Create(m->getContext(), "entry", function);
    4546        IRBuilder<> builder(entry);
    4647        std::vector<Value *> args;
     
    148149
    149150Value * IDISA_Builder::simd_cttz(unsigned fw, Value * a) {
    150     Value * cttzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::cttz, fwVectorType(fw));
     151    Value * cttzFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::cttz, fwVectorType(fw));
    151152    return CreateCall(cttzFunc, {fwCast(fw, a), ConstantInt::get(getInt1Ty(), 0)});
    152153}
    153154
    154155Value * IDISA_Builder::simd_popcount(unsigned fw, Value * a) {
    155     Value * ctpopFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctpop, fwVectorType(fw));
     156    Value * ctpopFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::ctpop, fwVectorType(fw));
    156157    return CreateCall(ctpopFunc, fwCast(fw, a));
    157158}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5374 r5440  
    1919Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
    2020    Type * const int32ty = getInt32Ty();
    21     Function * barrierOrFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
     21    Function * barrierOrFunc = cast<Function>(getModule()->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
    2222    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
    2323    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
     
    6666
    6767void IDISA_NVPTX20_Builder::CreateGlobals(){
    68 
     68    Module * const m = getModule();
    6969    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
    70     carry = new GlobalVariable(*mMod,
     70    carry = new GlobalVariable(*m,
    7171        /*Type=*/carryTy,
    7272        /*isConstant=*/false,
     
    8181    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
    8282
    83     bubble = new GlobalVariable(*mMod,
     83    bubble = new GlobalVariable(*m,
    8484        /*Type=*/bubbleTy,
    8585        /*isConstant=*/false,
     
    102102    Type * const voidTy = getVoidTy();
    103103    Type * const int32ty = getInt32Ty();
    104     barrierFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
    105     tidFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
    106 
     104    Module * const m = getModule();
     105    barrierFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
     106    tidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
    107107}
    108108
    109109void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
    110   Type * const int32ty = getInt32Ty();
    111   Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
    112 
    113   mLongAdvanceFunc = cast<Function>(mMod->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
    114   mLongAdvanceFunc->setCallingConv(CallingConv::C);
    115   Function::arg_iterator args = mLongAdvanceFunc->arg_begin();
    116 
    117   Value * const id = &*(args++);
    118   id->setName("id");
    119   Value * const val = &*(args++);
    120   val->setName("val");
    121   Value * const shftAmount = &*(args++);
    122   shftAmount->setName("shftAmount");
    123   Value * const blockCarry = &*(args++);
    124   blockCarry->setName("blockCarry");
    125 
    126   SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mLongAdvanceFunc,0));
    127 
    128   Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
    129   CreateStore(blockCarry, firstCarryPtr);
    130 
    131   Value * adv0 = CreateShl(val, shftAmount);
    132   Value * nextid = CreateAdd(id, getInt32(1));
    133   Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
    134   Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
    135   CreateStore(lshr0, carryNextPtr);
    136 
    137   CreateCall(barrierFunc);
    138 
    139   Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
    140   Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
    141 
    142   Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
    143   Value * carryVal = CreateLoad(carryPtr, "carryVal");
    144   Value * adv1 = CreateOr(adv0, carryVal);
    145 
    146  
    147   Value * retVal = UndefValue::get(returnType);
    148   retVal = CreateInsertValue(retVal, adv1, 0);
    149   retVal = CreateInsertValue(retVal, blockCarryOut, 1);
    150   CreateRet(retVal);
     110    Type * const int32ty = getInt32Ty();
     111    Module * const m = getModule();
     112    Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
     113    mLongAdvanceFunc = cast<Function>(m->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
     114    mLongAdvanceFunc->setCallingConv(CallingConv::C);
     115    auto args = mLongAdvanceFunc->arg_begin();
     116
     117    Value * const id = &*(args++);
     118    id->setName("id");
     119    Value * const val = &*(args++);
     120    val->setName("val");
     121    Value * const shftAmount = &*(args++);
     122    shftAmount->setName("shftAmount");
     123    Value * const blockCarry = &*(args++);
     124    blockCarry->setName("blockCarry");
     125
     126    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mLongAdvanceFunc,0));
     127
     128    Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
     129    CreateStore(blockCarry, firstCarryPtr);
     130
     131    Value * adv0 = CreateShl(val, shftAmount);
     132    Value * nextid = CreateAdd(id, getInt32(1));
     133    Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
     134    Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
     135    CreateStore(lshr0, carryNextPtr);
     136
     137    CreateCall(barrierFunc);
     138
     139    Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
     140    Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
     141
     142    Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
     143    Value * carryVal = CreateLoad(carryPtr, "carryVal");
     144    Value * adv1 = CreateOr(adv0, carryVal);
     145
     146
     147    Value * retVal = UndefValue::get(returnType);
     148    retVal = CreateInsertValue(retVal, adv1, 0);
     149    retVal = CreateInsertValue(retVal, blockCarryOut, 1);
     150    CreateRet(retVal);
    151151
    152152}
     
    157157  Type * const int64ty = getInt64Ty();
    158158  Type * const int32ty = getInt32Ty();
    159   Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
    160 
    161   mLongAddFunc = cast<Function>(mMod->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
     159  Module * const m = getModule();
     160
     161  Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
     162
     163  mLongAddFunc = cast<Function>(m->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
    162164  mLongAddFunc->setCallingConv(CallingConv::C);
    163165  Function::arg_iterator args = mLongAddFunc->arg_begin();
     
    172174  blockCarry->setName("blockCarry");
    173175
    174   BasicBlock * entryBlock = BasicBlock::Create(mMod->getContext(), "entry", mLongAddFunc, 0);
    175   BasicBlock * bubbleCalculateBlock = BasicBlock::Create(mMod->getContext(), "bubbleCalculate", mLongAddFunc, 0);
    176   BasicBlock * bubbleSetBlock = BasicBlock::Create(mMod->getContext(), "bubbleSet", mLongAddFunc, 0);
     176  BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", mLongAddFunc, 0);
     177  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(m->getContext(), "bubbleCalculate", mLongAddFunc, 0);
     178  BasicBlock * bubbleSetBlock = BasicBlock::Create(m->getContext(), "bubbleSet", mLongAddFunc, 0);
    177179
    178180  SetInsertPoint(entryBlock);
     
    243245    Type * const int32ty = getInt32Ty();
    244246    Type * const int1ty = getInt1Ty();
    245     Function * const ballotFn = cast<Function>(mMod->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
     247    Module * const m = getModule();
     248    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
    246249    ballotFn->setCallingConv(CallingConv::C);
    247250    Function::arg_iterator args = ballotFn->arg_begin();
     
    250253    input->setName("input");
    251254
    252     SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", ballotFn, 0));
     255    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
    253256
    254257    Value * conv = CreateZExt(input, int32ty);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5374 r5440  
    1212std::string IDISA_SSE2_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE2_" + std::to_string(mBitBlockWidth) : "SSE2";}
    1313
    14 Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     14Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {   
    1515    if ((fw == 16) && (mBitBlockWidth == 128)) {
    16         Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
     16        Value * packuswb_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse2_packuswb_128);
    1717        return CreateCall(packuswb_func, {simd_srli(16, a, 8), simd_srli(16, b, 8)});
    1818    }
     
    2323Value * IDISA_SSE2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
    2424    if ((fw == 16) && (mBitBlockWidth == 128)) {
    25         Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
     25        Value * packuswb_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse2_packuswb_128);
    2626        Value * mask = simd_lomask(16);
    2727        return CreateCall(packuswb_func, {fwCast(16, simd_and(a, mask)), fwCast(16, simd_and(b, mask))});
     
    3535    if (mBitBlockWidth == 128) {
    3636        if (fw == 64) {
    37             Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_movmsk_pd);
     37            Value * signmask_f64func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse2_movmsk_pd);
    3838            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    3939            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
     
    4141        }
    4242        if (fw == 8) {
    43             Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
     43            Value * pmovmskb_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse2_pmovmskb_128);
    4444            return CreateCall(pmovmskb_func, fwCast(8, a));
    4545        }
     
    4747    const auto fieldCount = mBitBlockWidth / fw;
    4848    if ((fieldCount > 4) && (fieldCount <= 16)) {
    49         Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
     49        Value * pmovmskb_func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse2_pmovmskb_128);
    5050        int fieldBytes = fw / 8;
    5151        int hiByte = fieldBytes - 1;
     
    6767    // SSE special cases using Intrinsic::x86_sse_movmsk_ps (fw=32 only)
    6868    if (fw == 32) {
    69         Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
     69        Value * signmask_f32func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse_movmsk_ps);
    7070        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    7171        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     
    8383        Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    8484        Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
    85         Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
     85        Value * signmask_f32func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_sse_movmsk_ps);
    8686        Value * mask = CreateCall(signmask_f32func, pack_as_ps);
    8787        return mask;
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r5310 r5440  
    2424    using Vars = std::vector<pablo::Var *>;
    2525
    26 //    CC_Compiler(pablo::PabloKernel * kernel, const unsigned encodingBits = 8, const std::string prefix = "basis");
    27    
    2826    CC_Compiler(pablo::PabloKernel * kernel, pablo::Var * basisBitSet);
    2927   
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.cpp

    r5436 r5440  
    1212namespace kernel {
    1313
    14 void editdCPUKernel::bitblock_advance_ci_co(Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const {
     14void editdCPUKernel::bitblock_advance_ci_co(const std::unique_ptr<kernel::KernelBuilder> & idb,
     15                                            Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx,
     16                                            std::vector<std::vector<Value *>> & adv,
     17                                            std::vector<std::vector<int>> & calculated, int i, int j) const {
    1518    if (calculated[i][j] == 0) {
    16         Value * ptr = iBuilder->CreateGEP(stideCarryArr, {iBuilder->getInt32(0), iBuilder->getInt32(carryIdx)});
    17         Value * ci = iBuilder->CreateLoad(ptr);
    18         std::pair<Value *, Value *> rslt = iBuilder->bitblock_advance(val, ci, shift);
    19         iBuilder->CreateStore(std::get<0>(rslt), ptr);
     19        Value * ptr = idb->CreateGEP(stideCarryArr, {idb->getInt32(0), idb->getInt32(carryIdx)});
     20        Value * ci = idb->CreateLoad(ptr);
     21        std::pair<Value *, Value *> rslt = idb->bitblock_advance(val, ci, shift);
     22        idb->CreateStore(std::get<0>(rslt), ptr);
    2023        adv[i][j] = std::get<1>(rslt);
    2124        calculated[i][j] = 1;
     
    2326}
    2427
    25 void editdCPUKernel::generateDoBlockMethod() {
    26     auto savePoint = iBuilder->saveIP();
     28void editdCPUKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     29    auto savePoint = idb->saveIP();
    2730
    28     Type * const int32ty = iBuilder->getInt32Ty();
    29     Type * const int8ty = iBuilder->getInt8Ty();
     31    Type * const int32ty = idb->getInt32Ty();
     32    Type * const int8ty = idb->getInt8Ty();
    3033
    31     Value * pattStartPtr = getScalarField("pattStream");
    32     Value * strideCarryArr = getScalarField("strideCarry");
     34    Value * pattStartPtr = idb->getScalarField("pattStream");
     35    Value * strideCarryArr = idb->getScalarField("strideCarry");
    3336
    3437    unsigned carryIdx = 0;
     
    3740    std::vector<std::vector<Value *>> adv(mPatternLen, std::vector<Value *>(mEditDistance + 1));
    3841    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
    39     Value * pattPos = iBuilder->getInt32(0);
    40     Value * pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
    41     Value * pattCh = iBuilder->CreateLoad(pattPtr);
    42     Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    43     Value * pattStream = loadInputStreamBlock("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    44     pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     42    Value * pattPos = idb->getInt32(0);
     43    Value * pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
     44    Value * pattCh = idb->CreateLoad(pattPtr);
     45    Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     46    Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     47    pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    4548
    4649    e[0][0] = pattStream;
    4750    for(unsigned j = 1; j <= mEditDistance; j++){
    48       e[0][j] = iBuilder->allOnes();
     51      e[0][j] = idb->allOnes();
    4952    }
    5053
    5154    for(unsigned i = 1; i < mPatternLen; i++){
    52         pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
    53         pattCh = iBuilder->CreateLoad(pattPtr);
    54         pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    55         Value * pattStream = loadInputStreamBlock("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
     55        pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
     56        pattCh = idb->CreateLoad(pattPtr);
     57        pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     58        Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
    5659
    57         bitblock_advance_ci_co(e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
    58         e[i][0] = iBuilder->CreateAnd(adv[i-1][0], pattStream);
     60        bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
     61        e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
    5962        for(unsigned j = 1; j<= mEditDistance; j++){
    60             bitblock_advance_ci_co(e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
    61             bitblock_advance_ci_co(e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
    62             bitblock_advance_ci_co(e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
    63             Value * tmp1 = iBuilder->CreateAnd(adv[i-1][j], pattStream);
    64             Value * tmp2 = iBuilder->CreateAnd(adv[i-1][j-1], iBuilder->CreateNot(pattStream));
    65             Value * tmp3 = iBuilder->CreateOr(adv[i][j-1], e[i-1][j-1]);
    66             e[i][j] = iBuilder->CreateOr(iBuilder->CreateOr(tmp1, tmp2), tmp3);
     63            bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
     64            bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
     65            bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
     66            Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
     67            Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
     68            Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
     69            e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
    6770
    6871        }
    69         pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     72        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    7073    }
    7174   
    72     storeOutputStreamBlock("ResultStream", iBuilder->getInt32(0), e[mPatternLen-1][0]);
     75    idb->storeOutputStreamBlock("ResultStream", idb->getInt32(0), e[mPatternLen-1][0]);
    7376    for(unsigned j = 1; j<= mEditDistance; j++){
    74         storeOutputStreamBlock("ResultStream", iBuilder->getInt32(j), iBuilder->CreateAnd(e[mPatternLen-1][j], iBuilder->CreateNot(e[mPatternLen-1][j-1])));
     77        idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), idb->CreateAnd(e[mPatternLen-1][j], idb->CreateNot(e[mPatternLen-1][j-1])));
    7578    }
    7679       
    77     iBuilder->CreateRetVoid();
    78     iBuilder->restoreIP(savePoint);
     80    idb->CreateRetVoid();
     81    idb->restoreIP(savePoint);
    7982}
    8083
    81 void editdCPUKernel::generateFinalBlockMethod(Value * remainingBytes) {
    82     setScalarField("EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
    83     CreateDoBlockMethodCall();
     84void editdCPUKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * remainingBytes) {
     85    idb->setScalarField("EOFmask", idb->bitblock_mask_from(remainingBytes));
     86    CreateDoBlockMethodCall(idb);
    8487}
    8588
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5436 r5440  
    2121
    2222private:
    23     void generateDoBlockMethod() override;
    24     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    25     void bitblock_advance_ci_co(llvm::Value * val, unsigned shift, llvm::Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<llvm::Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const;
     23    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) override;
     24    void generateFinalBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb, llvm::Value * remainingBytes) override;
     25    void bitblock_advance_ci_co(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * val, unsigned shift, llvm::Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<llvm::Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j) const;
    2626    unsigned mEditDistance;
    2727    unsigned mPatternLen;
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp

    r5436 r5440  
    1111namespace kernel {
    1212
    13 void bitblock_advance_ci_co(IDISA::IDISA_Builder * const iBuilder, Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j){
     13void bitblock_advance_ci_co(const std::unique_ptr<KernelBuilder> & iBuilder, Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j){
    1414    if (!calculated[i][j]) {
    1515        Value * ptr = iBuilder->CreateGEP(stideCarryArr, {iBuilder->getInt32(0), iBuilder->getInt32(carryIdx)});
     
    2222}
    2323
    24 void editdGPUKernel::generateDoBlockMethod() {
     24void editdGPUKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) {
    2525
    26     IntegerType * const int32ty = iBuilder->getInt32Ty();
    27     IntegerType * const int8ty = iBuilder->getInt8Ty();
    28     Value * pattLen = iBuilder->getInt32(mPatternLen + 1);
    29     Value * pattPos = iBuilder->getInt32(0);
    30     Value * pattBuf = getScalarField("pattStream");
    31     Value * strideCarryArr = getScalarField("strideCarry");
     26    IntegerType * const int32ty = idb->getInt32Ty();
     27    IntegerType * const int8ty = idb->getInt8Ty();
     28    Value * pattLen = idb->getInt32(mPatternLen + 1);
     29    Value * pattPos = idb->getInt32(0);
     30    Value * pattBuf = idb->getScalarField("pattStream");
     31    Value * strideCarryArr = idb->getScalarField("strideCarry");
    3232   
    3333    unsigned carryIdx = 0;
     
    3737    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
    3838
    39     Module * m = iBuilder->getModule();
     39    Module * m = idb->getModule();
    4040    Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    41     Value * bid = iBuilder->CreateCall(bidFunc);
    42     Value * pattStartPtr = iBuilder->CreateGEP(pattBuf, iBuilder->CreateMul(pattLen, bid));
    43     Value * pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
    44     Value * pattCh = iBuilder->CreateLoad(pattPtr);
    45     Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    46     Value * pattStream = loadInputStreamBlock("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    47     pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     41    Value * bid = idb->CreateCall(bidFunc);
     42    Value * pattStartPtr = idb->CreateGEP(pattBuf, idb->CreateMul(pattLen, bid));
     43    Value * pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
     44    Value * pattCh = idb->CreateLoad(pattPtr);
     45    Value * pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     46    Value * pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     47    pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    4848
    4949    e[0][0] = pattStream;
    5050    for(unsigned j = 1; j <= mEditDistance; j++){
    51         e[0][j] = iBuilder->allOnes();
     51        e[0][j] = idb->allOnes();
    5252    }
    5353    for(unsigned i = 1; i < mPatternLen; i++){
    54         pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
    55         pattCh = iBuilder->CreateLoad(pattPtr);
    56         pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
    57         pattStream = loadInputStreamBlock("CCStream", iBuilder->CreateZExt(pattIdx, int32ty));
    58         bitblock_advance_ci_co(iBuilder, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
    59         e[i][0] = iBuilder->CreateAnd(adv[i-1][0], pattStream);
     54        pattPtr = idb->CreateGEP(pattStartPtr, pattPos);
     55        pattCh = idb->CreateLoad(pattPtr);
     56        pattIdx = idb->CreateAnd(idb->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
     57        pattStream = idb->loadInputStreamBlock("CCStream", idb->CreateZExt(pattIdx, int32ty));
     58        bitblock_advance_ci_co(idb, e[i-1][0], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, 0);
     59        e[i][0] = idb->CreateAnd(adv[i-1][0], pattStream);
    6060        for(unsigned j = 1; j<= mEditDistance; j++){
    61             bitblock_advance_ci_co(iBuilder, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
    62             bitblock_advance_ci_co(iBuilder, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
    63             bitblock_advance_ci_co(iBuilder, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
    64             Value * tmp1 = iBuilder->CreateAnd(adv[i-1][j], pattStream);
    65             Value * tmp2 = iBuilder->CreateAnd(adv[i-1][j-1], iBuilder->CreateNot(pattStream));
    66             Value * tmp3 = iBuilder->CreateOr(adv[i][j-1], e[i-1][j-1]);
    67             e[i][j] = iBuilder->CreateOr(iBuilder->CreateOr(tmp1, tmp2), tmp3);
     61            bitblock_advance_ci_co(idb, e[i-1][j], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j);
     62            bitblock_advance_ci_co(idb, e[i-1][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
     63            bitblock_advance_ci_co(idb, e[i][j-1], 1, strideCarryArr, carryIdx++, adv, calculated, i, j-1);
     64            Value * tmp1 = idb->CreateAnd(adv[i-1][j], pattStream);
     65            Value * tmp2 = idb->CreateAnd(adv[i-1][j-1], idb->CreateNot(pattStream));
     66            Value * tmp3 = idb->CreateOr(adv[i][j-1], e[i-1][j-1]);
     67            e[i][j] = idb->CreateOr(idb->CreateOr(tmp1, tmp2), tmp3);
    6868        }
    69         pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
     69        pattPos = idb->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
    7070    }
    71     storeOutputStreamBlock("ResultStream", iBuilder->getInt32(0), e[mPatternLen-1][0]);
     71    idb->storeOutputStreamBlock("ResultStream", idb->getInt32(0), e[mPatternLen-1][0]);
    7272    for(unsigned j = 1; j<= mEditDistance; j++){
    73         storeOutputStreamBlock("ResultStream", iBuilder->getInt32(j), iBuilder->CreateAnd(e[mPatternLen - 1][j], iBuilder->CreateNot(e[mPatternLen - 1][j - 1])));
     73        idb->storeOutputStreamBlock("ResultStream", idb->getInt32(j), idb->CreateAnd(e[mPatternLen - 1][j], idb->CreateNot(e[mPatternLen - 1][j - 1])));
    7474    }
    7575}
    7676
    77 void editdGPUKernel::generateFinalBlockMethod(Value * remainingBytes) {
    78     setScalarField("EOFmask", iBuilder->bitblock_mask_from(remainingBytes));
    79     CreateDoBlockMethodCall();
     77void editdGPUKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * remainingBytes) {
     78    idb->setScalarField("EOFmask", idb->bitblock_mask_from(remainingBytes));
     79    CreateDoBlockMethodCall(idb);
    8080}
    8181
  • icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.h

    r5436 r5440  
    2121   
    2222private:
    23     void generateDoBlockMethod() override;
    24     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
     23    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) override;
     24    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingBytes) override;
    2525    unsigned mEditDistance;
    2626    unsigned mPatternLen;
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5436 r5440  
    1313namespace kernel {
    1414
    15 void editdScanKernel::generateDoBlockMethod() {
    16     auto savePoint = iBuilder->saveIP();
    17     Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
    18     iBuilder->restoreIP(savePoint);
     15void editdScanKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     16    auto savePoint = idb->saveIP();
     17    Function * scanWordFunction = generateScanWordRoutine(idb);
     18    idb->restoreIP(savePoint);
    1919
    20     const unsigned fieldCount = iBuilder->getBitBlockWidth() / mScanwordBitWidth;
    21     Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
     20    const unsigned fieldCount = idb->getBitBlockWidth() / mScanwordBitWidth;
     21    Type * T = idb->getIntNTy(mScanwordBitWidth);
    2222    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);
    23     Value * blockNo = getScalarField("BlockNo");
    24     Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
     23    Value * blockNo = idb->getScalarField("BlockNo");
     24    Value * scanwordPos = idb->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), idb->getBitBlockWidth()));
    2525   
    2626    std::vector<Value * > matchWordVectors;
    2727    for(unsigned d = 0; d <= mEditDistance; d++) {
    28         Value * matches = loadInputStreamBlock("matchResults", iBuilder->getInt32(d));
    29         matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
     28        Value * matches = idb->loadInputStreamBlock("matchResults", idb->getInt32(d));
     29        matchWordVectors.push_back(idb->CreateBitCast(matches, scanwordVectorType));
    3030    }
    3131   
    3232    for(unsigned i = 0; i < fieldCount; ++i) {
    3333        for(unsigned d = 0; d <= mEditDistance; d++) {
    34             Value * matchWord = iBuilder->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
    35             iBuilder->CreateCall(scanWordFunction, {matchWord, iBuilder->getInt32(d), scanwordPos});
     34            Value * matchWord = idb->CreateExtractElement(matchWordVectors[d], ConstantInt::get(T, i));
     35            idb->CreateCall(scanWordFunction, {matchWord, idb->getInt32(d), scanwordPos});
    3636        }
    37         scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
     37        scanwordPos = idb->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    3838    }
    3939
    40     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
     40    idb->setScalarField("BlockNo", idb->CreateAdd(blockNo, idb->getSize(1)));
    4141}
    4242
    43 Function * editdScanKernel::generateScanWordRoutine(Module * m) const {
     43Function * editdScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> &iBuilder) const {
    4444
    4545    IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
     46    Module * const m = iBuilder->getModule();
    4647
    4748    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
     
    5758
    5859    Constant * matchProcessor = m->getOrInsertFunction("wrapped_report_pos", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), nullptr);
    59 
    60     BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", scanFunc, 0);
    61 
    62     BasicBlock * matchesCondBlock = BasicBlock::Create(m->getContext(), "matchesCond", scanFunc, 0);
    63     BasicBlock * matchesLoopBlock = BasicBlock::Create(m->getContext(), "matchesLoop", scanFunc, 0);
    64     BasicBlock * matchesDoneBlock = BasicBlock::Create(m->getContext(), "matchesDone", scanFunc, 0);
     60    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", scanFunc, 0);
     61    BasicBlock * matchesCondBlock = BasicBlock::Create(iBuilder->getContext(), "matchesCond", scanFunc, 0);
     62    BasicBlock * matchesLoopBlock = BasicBlock::Create(iBuilder->getContext(), "matchesLoop", scanFunc, 0);
     63    BasicBlock * matchesDoneBlock = BasicBlock::Create(iBuilder->getContext(), "matchesDone", scanFunc, 0);
    6564
    6665    iBuilder->SetInsertPoint(entryBlock);
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.h

    r5436 r5440  
    1818       
    1919private:
    20     void generateDoBlockMethod() override;
    21     llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
     20    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     21    llvm::Function * generateScanWordRoutine(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) const;
    2222       
    2323    unsigned mEditDistance;
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5436 r5440  
    292292        pxDriver.addKernelCall(matchCountK, {MergedResults}, {});
    293293        pxDriver.generatePipelineIR();
    294         Value * matchedLineCount = matchCountK.getScalarField("matchedLineCount");
     294        idb->setKernel(&matchCountK);
     295        Value * matchedLineCount = idb->getScalarField("matchedLineCount");
    295296        matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
    296297        idb->CreateRet(matchedLineCount);
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5436 r5440  
    1111namespace kernel {
    1212
    13 inline void ap_p2s_step(IDISA::IDISA_Builder * const iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
     13inline void ap_p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
    1414    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
    1515    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
     
    1818}
    1919
    20 inline void p2s(IDISA::IDISA_Builder * const iBuilder, Value * p[], Value * s[]) {
     20inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
    2121    Value * bit00004444[2];
    2222    Value * bit22226666[2];
     
    3838}
    3939
    40 void PrintableBits::generateDoBlockMethod() {
     40void PrintableBits::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    4141    // Load current block
    42     Value * bitStrmVal = loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
     42    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStream", iBuilder->getInt32(0));
    4343
    4444    Value * bits[8];
     
    8181   
    8282    for (unsigned j = 0; j < 8; ++j) {
    83         storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
    84     }
    85 }
    86 
    87 void SelectStream::generateDoBlockMethod() {
     83        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), iBuilder->bitCast(printableBytes[j]));
     84    }
     85}
     86
     87void SelectStream::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    8888    if (mStreamIndex >= mSizeInputStreamSet)
    8989        llvm::report_fatal_error("Stream index out of bounds.\n");
    9090   
    91     Value * bitStrmVal = loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
    92 
    93     storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
    94 }
    95 
    96 void PrintStreamSet::generateDoBlockMethod() {
     91    Value * bitStrmVal = iBuilder->loadInputStreamBlock("bitStreams", iBuilder->getInt32(mStreamIndex));
     92
     93    iBuilder->storeOutputStreamBlock("bitStream", iBuilder->getInt32(0), bitStrmVal);
     94}
     95
     96void PrintStreamSet::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    9797
    9898    /*
     
    122122        BasicBlock * entry = iBuilder->GetInsertBlock();
    123123
    124         Value * count = getInputStreamSetCount(name);
     124        Value * count = iBuilder->getInputStreamSetCount(name);
    125125        ConstantInt * const streamLength = iBuilder->getSize(iBuilder->getBitBlockWidth() + mNameWidth + 1);
    126126        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
     
    135135
    136136            // Load current block
    137             Value * const input = loadInputStreamBlock(name, iBuilder->getInt32(0));
     137            Value * const input = iBuilder->loadInputStreamBlock(name, iBuilder->getInt32(0));
    138138
    139139            Value * bits[8];
     
    164164            iBuilder->CreateStore(iBuilder->getInt8('['), iBuilder->CreateGEP(output, length));
    165165
    166             BasicBlock * cond = CreateBasicBlock("cond");
    167 
    168             BasicBlock * getIntLength = CreateBasicBlock("getIntLength");
    169 
    170             BasicBlock * writeInt = CreateBasicBlock("writeInt");
    171             BasicBlock * writeVector = CreateBasicBlock("writeVector");
    172 
    173             BasicBlock * exit = CreateBasicBlock("exit");
     166            BasicBlock * cond = iBuilder->CreateBasicBlock("cond");
     167
     168            BasicBlock * getIntLength = iBuilder->CreateBasicBlock("getIntLength");
     169
     170            BasicBlock * writeInt = iBuilder->CreateBasicBlock("writeInt");
     171            BasicBlock * writeVector = iBuilder->CreateBasicBlock("writeVector");
     172
     173            BasicBlock * exit = iBuilder->CreateBasicBlock("exit");
    174174
    175175            ConstantInt * TEN = iBuilder->getSize(10);
     
    217217
    218218            // Load current block
    219             Value * const input = loadInputStreamBlock(name, i);
     219            Value * const input = iBuilder->loadInputStreamBlock(name, i);
    220220
    221221            Value * bits[8];
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.h

    r5436 r5440  
    1212namespace kernel {
    1313
    14 class PrintableBits : public BlockOrientedKernel {
     14class PrintableBits final : public BlockOrientedKernel {
    1515public:
    1616    PrintableBits(const std::unique_ptr<kernel::KernelBuilder> & builder);
    17     virtual ~PrintableBits() {}
    1817private:
    19     void generateDoBlockMethod() override;
     18    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2019};
    2120
    22 class SelectStream : public BlockOrientedKernel {
     21class SelectStream final : public BlockOrientedKernel {
    2322public:
    2423    SelectStream(const std::unique_ptr<kernel::KernelBuilder> & builder, unsigned sizeInputStreamSet, unsigned streamIndex);
    25     virtual ~SelectStream() {}
    2624private:
    27     void generateDoBlockMethod() override;
     25    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2826    unsigned mSizeInputStreamSet;
    2927    unsigned mStreamIndex;
    3028};
    3129
    32 class PrintStreamSet : public BlockOrientedKernel {
     30class PrintStreamSet final : public BlockOrientedKernel {
    3331public:
    3432    PrintStreamSet(const std::unique_ptr<kernel::KernelBuilder> & builder, std::vector<std::string> && names, const unsigned minWidth = 16);
    35     virtual ~PrintStreamSet() {}
    3633private:
    37     void generateDoBlockMethod() override;
     34    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3835private:
    3936    const std::vector<std::string> mNames;
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5436 r5440  
    2727}
    2828
    29 void DirectCharacterClassKernelBuilder::generateDoBlockMethod() {
     29void DirectCharacterClassKernelBuilder::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    3030    unsigned packCount = 8 * mCodeUnitSize; 
    3131    unsigned codeUnitWidth = 8 * mCodeUnitSize;
    3232    Value * codeUnitPack[packCount];
    3333    for (unsigned i = 0; i < packCount; i++) {
    34         codeUnitPack[i] = loadInputStreamPack("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
     34        codeUnitPack[i] = iBuilder->loadInputStreamPack("codeUnitStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
    3535    }
    3636    for (unsigned j = 0; j < mCharClasses.size();  j++) {
     
    6666            theCCstream = iBuilder->simd_or(theCCstream, pack);
    6767        }
    68         storeOutputStreamBlock("ccStream", iBuilder->getInt32(j), theCCstream);
     68        iBuilder->storeOutputStreamBlock("ccStream", iBuilder->getInt32(j), theCCstream);
    6969    }
    7070}
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.h

    r5436 r5440  
    1515class DirectCharacterClassKernelBuilder final : public BlockOrientedKernel {
    1616public:   
    17     DirectCharacterClassKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize);
    18     void generateDoBlockMethod() override;
     17    DirectCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, std::vector<re::CC *> charClasses, unsigned codeUnitSize);
     18    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
    1919private:
    2020    const std::vector<re::CC *> mCharClasses;
     
    2525class ParabixCharacterClassKernelBuilder final : public pablo::PabloKernel {
    2626public:
    27     ParabixCharacterClassKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitSize);
     27    ParabixCharacterClassKernelBuilder(const std::unique_ptr<KernelBuilder> & b, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned codeUnitSize);
    2828protected:
    2929    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.cpp

    r5436 r5440  
    1414namespace kernel {
    1515
    16 void CCScanKernel::generateDoBlockMethod() {
     16void CCScanKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    1717    auto savePoint = iBuilder->saveIP();
    18     Function * scanWordFunction = generateScanWordRoutine(iBuilder->getModule());
     18    Function * scanWordFunction = generateScanWordRoutine(iBuilder);
    1919    iBuilder->restoreIP(savePoint);
    2020
     
    2222    Type * T = iBuilder->getIntNTy(mScanwordBitWidth);
    2323    VectorType * scanwordVectorType =  VectorType::get(T, fieldCount);   
    24     Value * blockNo = getScalarField("BlockNo");
     24    Value * blockNo = iBuilder->getScalarField("BlockNo");
    2525    Value * scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    2626   
    2727    std::vector<Value * > matchWordVectors;
    2828    for(unsigned d = 0; d < mStreamNum; d++) {
    29         Value * matches = loadInputStreamBlock("matchResults", iBuilder->getInt32(d));
     29        Value * matches = iBuilder->loadInputStreamBlock("matchResults", iBuilder->getInt32(d));
    3030        matchWordVectors.push_back(iBuilder->CreateBitCast(matches, scanwordVectorType));
    3131    }
     
    3838        scanwordPos = iBuilder->CreateAdd(scanwordPos, ConstantInt::get(T, mScanwordBitWidth));
    3939    }   
    40     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
     40    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getSize(1)));
    4141}
    4242
    43 Function * CCScanKernel::generateScanWordRoutine(Module * m) const {
     43Function * CCScanKernel::generateScanWordRoutine(const std::unique_ptr<KernelBuilder> & iBuilder) const {
    4444
    4545    IntegerType * T = iBuilder->getIntNTy(mScanwordBitWidth);
     46
     47    Module * const m = iBuilder->getModule();
    4648
    4749    Function * scanFunc = cast<Function>(m->getOrInsertFunction("scan_word", iBuilder->getVoidTy(), T, iBuilder->getInt32Ty(), T, nullptr));
  • icGREP/icgrep-devel/icgrep/kernels/cc_scan_kernel.h

    r5436 r5440  
    1818       
    1919private:
    20     void generateDoBlockMethod() override;
    21     llvm::Function * generateScanWordRoutine(llvm::Module * m) const;
     20    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     21    llvm::Function * generateScanWordRoutine(const std::unique_ptr<KernelBuilder> & iBuilder) const;
    2222       
    2323    unsigned mStreamNum;
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5436 r5440  
    1212namespace kernel {
    1313
    14 inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask) {
     14inline std::vector<Value *> parallel_prefix_deletion_masks(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask) {
    1515    Value * m = iBuilder->simd_not(del_mask);
    1616    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
     
    2929}
    3030
    31 inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
     31inline Value * apply_parallel_prefix_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
    3232    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
    3333    for (unsigned i = 0; i < mv.size(); i++) {
     
    3939}
    4040
    41 inline Value * partial_sum_popcount(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * mask) {
     41inline Value * partial_sum_popcount(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * mask) {
    4242    Value * field = iBuilder->simd_popcount(fw, mask);
    4343    const auto count = iBuilder->getBitBlockWidth() / fw;
     
    5252// Outputs: the deleted streams, plus a partial sum popcount
    5353
    54 void DeletionKernel::generateDoBlockMethod() {
    55     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     54void DeletionKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     55    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    5656    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    5757    for (unsigned j = 0; j < mStreamCount; ++j) {
    58         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     58        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    5959        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    60         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     60        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    6161    }
    6262    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    63     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    64 }
    65 
    66 void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes) {
     63    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     64}
     65
     66void DeletionKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    6767    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    6868    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    6969    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    70     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     70    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    7171    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    7272    for (unsigned j = 0; j < mStreamCount; ++j) {
    73         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     73        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    7474        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    75         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     75        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    7676    }
    7777    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    78     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     78    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    7979}
    8080
     
    9292const unsigned PEXT_width = 64;
    9393
    94 inline std::vector<Value *> get_PEXT_masks(IDISA::IDISA_Builder * const iBuilder, Value * del_mask) {
     94inline std::vector<Value *> get_PEXT_masks(const std::unique_ptr<KernelBuilder> & iBuilder, Value * del_mask) {
    9595    Value * m = iBuilder->fwCast(PEXT_width, iBuilder->simd_not(del_mask));
    9696    std::vector<Value *> masks;
     
    103103// Apply PEXT deletion to a collection of blocks and swizzle the result.
    104104// strms contains the blocks to process
    105 inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
     105inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
    106106    Value * PEXT_func = nullptr;
    107107    if (PEXT_width == 64) {
     
    146146}
    147147
    148 inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, Value * strm) {
     148inline Value * apply_PEXT_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * strm) {
    149149    Value * PEXT_func = nullptr;
    150150    if (PEXT_width == 64) {
     
    168168// Outputs: swizzles containing the swizzled deleted streams, plus a partial sum popcount
    169169
    170 void DeleteByPEXTkernel::generateDoBlockMethod() {
    171     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     170void DeleteByPEXTkernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     171    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    172172    const auto masks = get_PEXT_masks(iBuilder, delMask);
    173     generateProcessingLoop(masks, delMask);
    174 }
    175 
    176 void DeleteByPEXTkernel::generateFinalBlockMethod(Value * remainingBytes) {
     173    generateProcessingLoop(iBuilder, masks, delMask);
     174}
     175
     176void DeleteByPEXTkernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder, Value * remainingBytes) {
    177177    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    178178    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    179179    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    180     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     180    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    181181    const auto masks = get_PEXT_masks(iBuilder, delMask);
    182     generateProcessingLoop(masks, delMask);
    183 }
    184 
    185 void DeleteByPEXTkernel::generateProcessingLoop(const std::vector<Value *> & masks, Value * delMask) {
    186     if (mShouldSwizzle)    
    187         generatePEXTAndSwizzleLoop(masks);
    188     else
    189         generatePEXTLoop(masks);   
    190    
     182    generateProcessingLoop(iBuilder, masks, delMask);
     183}
     184
     185void DeleteByPEXTkernel::generateProcessingLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * delMask) {
     186    if (mShouldSwizzle) {
     187        generatePEXTAndSwizzleLoop(iBuilder, masks);
     188    } else {
     189        generatePEXTLoop(iBuilder, masks);
     190    }
    191191    //Value * delCount = partial_sum_popcount(iBuilder, mDelCountFieldWidth, apply_PEXT_deletion(iBuilder, masks, iBuilder->simd_not(delMask)));
    192192    Value * delCount = iBuilder->simd_popcount(mDelCountFieldWidth, iBuilder->simd_not(delMask));
    193     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    194 }
    195 
    196 void DeleteByPEXTkernel::generatePEXTLoop(const std::vector<Value *> & masks) {
     193    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     194}
     195
     196void DeleteByPEXTkernel::generatePEXTLoop(const std::unique_ptr<KernelBuilder> &iBuilder, const std::vector<Value *> & masks) {
    197197    for (unsigned j = 0; j < mStreamCount; ++j) {
    198         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     198        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    199199        Value * output = apply_PEXT_deletion(iBuilder, masks, input);
    200         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    201     }
    202 }
    203 
    204 void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::vector<Value *> & masks) {
     200        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     201    }
     202}
     203
     204void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks) {
    205205    // Group blocks together into input vector. Input should contain mStreamCount/mSwizzleFactor blocks (e.g. for U8U16 16/4=4)
    206206    // mStreamCount/mSwizzleFactor -> (mStreamCount + mSwizzleFactor - 1) / mSwizzleFactor
     
    210210        for (unsigned i = streamSelectionIndex; i < (streamSelectionIndex + mSwizzleFactor); ++i) {
    211211                // Check if i > mStreamCount. If it is, add null streams until we get mStreamCount/mSwizzleFactor streams in the input vector
    212             if ( i >= mStreamCount)
     212            if ( i >= mStreamCount) {
    213213                                input.push_back(iBuilder->allZeroes());
    214                         else
    215                         input.push_back(loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     214            } else {
     215                input.push_back(iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     216            }
    216217        }
    217218        std::vector<Value *> output = apply_PEXT_deletion_with_swizzle(iBuilder, masks, input);
    218219        for (unsigned i = 0; i < mSwizzleFactor; i++) {
    219              storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
     220             iBuilder->storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
    220221        }
    221222    }
     
    280281}
    281282   
    282 void SwizzledBitstreamCompressByCount::generateDoBlockMethod() {
     283void SwizzledBitstreamCompressByCount::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    283284       
    284     Value * countStreamPtr = iBuilder->CreateBitCast(getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0)), iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
     285    Value * countsPerStridePtr = iBuilder->getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0));
     286    Value * countStreamPtr = iBuilder->CreatePointerCast(countsPerStridePtr, iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
    285287   
    286288    // Output is written and committed to the output buffer one swizzle at a time.
     
    289291    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    290292   
    291     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     293    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    292294    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    293295    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    294296
    295297    // There may be pending data in the kernel state, for up to mFieldWidth-1 bits per stream.
    296     Value * pendingOffset = getScalarField("pendingOffset");
     298    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    297299    // There is a separate vector of pending data for each swizzle group.
    298300    std::vector<Value *> pendingData;
    299301    std::vector<Value *> outputStreamPtr;
    300302    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    301         pendingData.push_back(getScalarField("pendingSwizzleData" + std::to_string(i)));
    302         outputStreamPtr.push_back(getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
     303        pendingData.push_back(iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i)));
     304        outputStreamPtr.push_back(iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
    303305    }
    304306   
     
    313315        // according to the same newItemCount, pendingSpace, ...
    314316        for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    315             Value * newItems = loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
     317            Value * newItems = iBuilder->loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
    316318            // Combine as many of the new items as possible into the pending group.
    317319            Value * combinedGroup = iBuilder->CreateOr(pendingData[j], iBuilder->CreateShl(newItems, iBuilder->simd_fill(mFieldWidth, pendingOffset)));
     
    327329        pendingOffset = iBuilder->CreateAnd(iBuilder->CreateAdd(newItemCount, pendingOffset), iBuilder->getSize(mFieldWidth-1));
    328330    }
    329     setScalarField("pendingOffset", pendingOffset);
     331    iBuilder->setScalarField("pendingOffset", pendingOffset);
    330332   
    331333    Value * newlyProduced = iBuilder->CreateSub(iBuilder->CreateShl(outputIndex, outputIndexShift), producedOffset);
    332334    Value * produced = iBuilder->CreateAdd(outputProduced, newlyProduced);
    333335    for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    334         setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
    335     }
    336     setProducedItemCount("outputSwizzle0", produced);
    337 }
    338 
    339 void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(Value * remainingBytes) {
    340     CreateDoBlockMethodCall();
     336        iBuilder->setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
     337    }
     338    iBuilder->setProducedItemCount("outputSwizzle0", produced);
     339}
     340
     341void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * /* remainingBytes */) {
     342    CreateDoBlockMethodCall(iBuilder);
    341343    Constant * blockOffsetMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    342344    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    343345   
    344     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     346    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    345347    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    346348    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    347     Value * pendingOffset = getScalarField("pendingOffset");
     349    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    348350
    349351    // Write the pending data.
    350352    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    351         Value * pendingData = getScalarField("pendingSwizzleData" + std::to_string(i));
    352         Value * outputStreamPtr = getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
     353        Value * pendingData = iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i));
     354        Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
    353355        iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
    354356    }
    355     setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
    356 }
    357 }
     357    iBuilder->setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
     358}
     359}
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5436 r5440  
    2929    bool moduleIDisSignature() const override { return true; }
    3030protected:
    31     void generateDoBlockMethod() override;
    32     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
     31    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     32    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
    3333private:
    3434    const unsigned mDeletionFieldWidth;
     
    4242    bool moduleIDisSignature() const override { return true; }
    4343protected:
    44     void generateDoBlockMethod() override;
    45     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;
    46     void generatePEXTAndSwizzleLoop(const std::vector<llvm::Value *> & masks);
    47     void generatePEXTLoop(const std::vector<llvm::Value *> & masks);
    48     void generateProcessingLoop(const std::vector<llvm::Value *> & masks, llvm::Value * delMask);   
     44    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     45    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
     46    void generatePEXTAndSwizzleLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks);
     47    void generatePEXTLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks);
     48    void generateProcessingLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<llvm::Value *> & masks, llvm::Value * delMask);
    4949private:
    5050    const unsigned mDelCountFieldWidth;
     
    6161    bool moduleIDisSignature() const override { return true; }
    6262protected:
    63     void generateDoBlockMethod() override;
    64     void generateFinalBlockMethod(llvm::Value * remainingBytes) override;   
     63    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     64    void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value * remainingBytes) override;
    6565private:
    6666    const unsigned mBitStreamCount;
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.cpp

    r5436 r5440  
    1111namespace kernel {
    1212
    13 void EvenOddKernel::generateDoBlockMethod() {
     13void EvenOddKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    1414    Value * even = iBuilder->simd_fill(64, iBuilder->getInt64(0x5555555555555555));
    1515    Value * odd = iBuilder->bitCast(iBuilder->simd_fill(8, iBuilder->getInt8(0xAA)));
    16     storeOutputStreamBlock("even_odd", iBuilder->getInt32(0), even);
    17     storeOutputStreamBlock("even_odd", iBuilder->getInt32(1), odd);
     16    iBuilder->storeOutputStreamBlock("even_odd", iBuilder->getInt32(0), even);
     17    iBuilder->storeOutputStreamBlock("even_odd", iBuilder->getInt32(1), odd);
    1818}
    1919
  • icGREP/icgrep-devel/icgrep/kernels/evenodd.h

    r5436 r5440  
    1212namespace kernel {
    1313
    14 class EvenOddKernel : public BlockOrientedKernel {
     14class EvenOddKernel final : public BlockOrientedKernel {
    1515public:
    1616    EvenOddKernel(const std::unique_ptr<kernel::KernelBuilder> & builder);
    17     virtual ~EvenOddKernel() {}
    1817private:
    19     void generateDoBlockMethod() override;
    20 
     18    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2119};
    2220
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5436 r5440  
    4040}
    4141
    42 std::string ICgrepKernelBuilder::makeSignature() {
     42std::string ICgrepKernelBuilder::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    4343    return mSignature;
    4444}
     
    4848}
    4949
    50 void InvertMatchesKernel::generateDoBlockMethod() {
    51     Value * input = loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
    52     Value * lbs = loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
     50void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     51    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
     52    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
    5353    Value * inverted = iBuilder->CreateXor(input, lbs);
    54     storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
     54    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
    5555}
    5656
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5436 r5440  
    1515public:
    1616    ICgrepKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast);   
    17     std::string makeSignature() override;
     17    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    1818    bool isCachable() const override { return true; }
    1919protected:
     
    2828    InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder);
    2929private:
    30     void generateDoBlockMethod() override;
     30    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    3131};
    3232
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5438 r5440  
    126126}
    127127
    128 void KernelInterface::addKernelDeclarations() {
     128void KernelInterface::addKernelDeclarations(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    129129
    130130    if (mKernelStateType == nullptr) {
     
    132132    }
    133133
    134     Module * const module = iBuilder->getModule();
     134    Module * const module = idb->getModule();
    135135    PointerType * const selfType = mKernelStateType->getPointerTo();
    136     IntegerType * const sizeTy = iBuilder->getSizeTy();
     136    IntegerType * const sizeTy = idb->getSizeTy();
    137137    PointerType * const consumerTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    138     Type * const voidTy = iBuilder->getVoidTy();
     138    Type * const voidTy = idb->getVoidTy();
    139139
    140140    // Create the initialization function prototype
     
    155155    }
    156156    for (auto binding : mStreamSetOutputs) {
    157         (args++)->setName(binding.name + "ConsumerLocks");
     157        (++args)->setName(binding.name + "ConsumerLocks");
    158158    }
    159159
    160160    // Create the doSegment function prototype.
    161     std::vector<Type *> params = {selfType, iBuilder->getInt1Ty()};
     161    std::vector<Type *> params = {selfType, idb->getInt1Ty()};
    162162    params.insert(params.end(), mStreamSetInputs.size(), sizeTy);
    163163
     
    177177    Type * resultType = nullptr;
    178178    if (mScalarOutputs.empty()) {
    179         resultType = iBuilder->getVoidTy();
     179        resultType = idb->getVoidTy();
    180180    } else {
    181181        const auto n = mScalarOutputs.size();
     
    187187            resultType = outputType[0];
    188188        } else {
    189             resultType = StructType::get(iBuilder->getContext(), ArrayRef<Type *>(outputType, n));
     189            resultType = StructType::get(idb->getContext(), ArrayRef<Type *>(outputType, n));
    190190        }
    191191    }
     
    198198    args->setName("self");
    199199
    200     linkExternalMethods();
     200    linkExternalMethods(idb);
    201201}
    202202
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5438 r5440  
    9292    virtual bool isCachable() const = 0;
    9393
    94     virtual std::string makeSignature() = 0;
     94    virtual std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
    9595
    9696    const std::vector<Binding> & getStreamInputs() const {
     
    127127
    128128    // Add ExternalLinkage method declarations for the kernel to a given client module.
    129     void addKernelDeclarations();
    130 
    131     virtual void linkExternalMethods() = 0;
    132 
    133     virtual llvm::Value * createInstance() = 0;
    134 
    135     virtual void initializeInstance() = 0;
    136 
    137     virtual void finalizeInstance() = 0;
     129    void addKernelDeclarations(const std::unique_ptr<kernel::KernelBuilder> & idb);
     130
     131    virtual void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     132
     133    virtual llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     134
     135    virtual void initializeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
     136
     137    virtual void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) = 0;
    138138
    139139    void setInitialArguments(std::vector<llvm::Value *> && args) {
     
    145145    }
    146146
     147    void setInstance(llvm::Value * const instance) {
     148        assert ("kernel instance cannot be null!" && instance);
     149        assert ("kernel instance must point to a valid kernel state type!" && (instance->getType()->getPointerElementType() == mKernelStateType));
     150        mKernelInstance = instance;
     151    }
     152
    147153    unsigned getLookAhead() const {
    148154        return mLookAheadPositions;
     
    153159    }
    154160
    155     kernel::KernelBuilder * getBuilder() const {
    156         return iBuilder;
    157     }
    158 
    159     void setBuilder(const std::unique_ptr<kernel::KernelBuilder> & builder) {
    160         iBuilder = builder.get();
    161     }
    162 
    163161protected:
    164 
    165     virtual llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const = 0;
    166 
    167     virtual void setProducedItemCount(const std::string & name, llvm::Value * value) const = 0;
    168 
    169     virtual llvm::Value * getProcessedItemCount(const std::string & name) const = 0;
    170 
    171     virtual void setProcessedItemCount(const std::string & name, llvm::Value * value) const = 0;
    172 
    173     virtual llvm::Value * getConsumedItemCount(const std::string & name) const = 0;
    174 
    175     virtual void setConsumedItemCount(const std::string & name, llvm::Value * value) const = 0;
    176 
    177     virtual llvm::Value * getTerminationSignal() const = 0;
    178 
    179     virtual void setTerminationSignal() const = 0;
    180162
    181163    llvm::Function * getInitFunction(llvm::Module * const module) const;
     
    191173                    std::vector<Binding> && scalar_outputs,
    192174                    std::vector<Binding> && internal_scalars)
    193     : iBuilder(nullptr)
     175    : mKernelInstance(nullptr)
    194176    , mModule(nullptr)
    195     , mKernelInstance(nullptr)
    196177    , mKernelStateType(nullptr)
    197178    , mLookAheadPositions(0)
     
    201182    , mScalarInputs(scalar_inputs)
    202183    , mScalarOutputs(scalar_outputs)
    203     , mInternalScalars(internal_scalars)
    204     {
     184    , mInternalScalars(internal_scalars) {
    205185
    206186    }
    207187   
    208     void setInstance(llvm::Value * const instance) {
    209         assert ("kernel instance cannot be null!" && instance);
    210         assert ("kernel instance must point to a valid kernel state type!" && (instance->getType()->getPointerElementType() == mKernelStateType));
    211         mKernelInstance = instance;
    212     }
    213 
    214188protected:
    215    
    216     kernel::KernelBuilder *                 iBuilder;
     189
     190    llvm::Value *                           mKernelInstance;
    217191    llvm::Module *                          mModule;
    218 
    219     llvm::Value *                           mKernelInstance;
    220192    llvm::StructType *                      mKernelStateType;
    221193    unsigned                                mLookAheadPositions;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5439 r5440  
    5757}
    5858
    59 // Get the value of a scalar field for the current instance.
    60 llvm::Value * Kernel::getScalarFieldPtr(llvm::Value * index) const {
    61     return iBuilder->CreateGEP(getInstance(), {iBuilder->getInt32(0), index});
    62 }
    63 
    64 llvm::Value * Kernel::getScalarFieldPtr(const std::string & fieldName) const {
    65     return getScalarFieldPtr(iBuilder->getInt32(getScalarIndex(fieldName)));
    66 }
    67 
    68 llvm::Value * Kernel::getScalarField(const std::string & fieldName) const {
    69     return iBuilder->CreateLoad(getScalarFieldPtr(fieldName), fieldName);
    70 }
    71 
    72 // Set the value of a scalar field for the current instance.
    73 void Kernel::setScalarField(const std::string & fieldName, llvm::Value * value) const {
    74     iBuilder->CreateStore(value, getScalarFieldPtr(fieldName));
    75 }
    76 
    7759void Kernel::prepareStreamSetNameMap() {
    7860    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     
    8466}
    8567   
    86 void Kernel::prepareKernel() {
    87     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     68void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     69    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     70    assert ("IDISA Builder does not have a valid Module" && idb->getModule());
     71    std::stringstream cacheName;   
     72    cacheName << getName() << '_' << idb->getBuilderUniqueName();
     73    for (const StreamSetBuffer * b: inputs) {
     74        cacheName <<  ':' <<  b->getUniqueID();
     75    }
     76    for (const StreamSetBuffer * b: outputs) {
     77        cacheName <<  ':' <<  b->getUniqueID();
     78    }
     79    Module * const kernelModule = new Module(cacheName.str(), idb->getContext());
     80    createKernelStub(idb, inputs, outputs, kernelModule);
     81}
     82
     83void Kernel::createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
     84    assert (mModule == nullptr);
     85    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     86    assert (mStreamSetInputBuffers.empty());
     87    assert (mStreamSetOutputBuffers.empty());
     88
     89    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
     90        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
     91                           " input stream sets but was given "
     92                           + std::to_string(inputs.size()));
     93    }
     94
     95    for (unsigned i = 0; i < inputs.size(); ++i) {
     96        StreamSetBuffer * const buf = inputs[i];
     97        if (LLVM_UNLIKELY(buf == nullptr)) {
     98            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
     99                               + " cannot be null");
     100        }
     101        buf->addConsumer(this);
     102    }
     103
     104    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
     105        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
     106                           + " output stream sets but was given "
     107                           + std::to_string(outputs.size()));
     108    }
     109
     110    for (unsigned i = 0; i < outputs.size(); ++i) {
     111        StreamSetBuffer * const buf = outputs[i];
     112        if (LLVM_UNLIKELY(buf == nullptr)) {
     113            report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
     114        }
     115        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
     116            buf->setProducer(this);
     117        } else {
     118            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     119                               + " is already produced by kernel " + buf->getProducer()->getName());
     120        }
     121    }
     122
     123    mModule = kernelModule;
     124    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
     125    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     126    prepareKernel(idb);
     127}
     128
     129void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & idb) {
     130    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    88131    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    89132        report_fatal_error("Cannot prepare kernel after kernel state finalized");
     
    103146        report_fatal_error(out.str());
    104147    }
    105     const auto blockSize = iBuilder->getBitBlockWidth();
     148    const auto blockSize = idb->getBitBlockWidth();
    106149    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    107150        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
     
    110153        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    111154        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
    112             addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    113         }       
    114     }
    115 
    116     IntegerType * const sizeTy = iBuilder->getSizeTy();
     155            addScalar(idb->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
     156        }
     157    }
     158
     159    IntegerType * const sizeTy = idb->getSizeTy();
    117160    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    118161        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
     
    140183
    141184    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    142     addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
     185    addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL);
    143186
    144187    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     
    146189    }
    147190
    148     mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    149 }
    150 
    151 void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    152     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    153     assert ("IDISA Builder does not have a valid Module" && iBuilder->getModule());
    154     std::stringstream cacheName;   
    155     cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
    156     for (const StreamSetBuffer * b: inputs) {
    157         cacheName <<  ':' <<  b->getUniqueID();
    158     }
    159     for (const StreamSetBuffer * b: outputs) {
    160         cacheName <<  ':' <<  b->getUniqueID();
    161     }
    162     Module * const kernelModule = new Module(cacheName.str(), iBuilder->getContext());
    163     kernelModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    164     createKernelStub(inputs, outputs, kernelModule);
    165 }
    166 
    167 void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
    168     assert (mModule == nullptr);
    169     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    170     assert (mStreamSetInputBuffers.empty());
    171     assert (mStreamSetOutputBuffers.empty());
    172 
    173     if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
    174         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
    175                            " input stream sets but was given "
    176                            + std::to_string(inputs.size()));
    177     }
    178 
    179     for (unsigned i = 0; i < inputs.size(); ++i) {
    180         StreamSetBuffer * const buf = inputs[i];
    181         if (LLVM_UNLIKELY(buf == nullptr)) {
    182             report_fatal_error(getName() + ": input stream set " + std::to_string(i)
    183                                + " cannot be null");
    184         }
    185         buf->addConsumer(this);
    186     }
    187 
    188     if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
    189         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
    190                            + " output stream sets but was given "
    191                            + std::to_string(outputs.size()));
    192     }
    193 
    194     for (unsigned i = 0; i < outputs.size(); ++i) {
    195         StreamSetBuffer * const buf = outputs[i];
    196         if (LLVM_UNLIKELY(buf == nullptr)) {
    197             report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
    198         }
    199         if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
    200             buf->setProducer(this);
    201         } else {
    202             report_fatal_error(getName() + ": output stream set " + std::to_string(i)
    203                                + " is already produced by kernel " + buf->getProducer()->getName());
    204         }
    205     }
    206 
    207     mModule = kernelModule;
    208 
    209     mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    210     mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
    211 
    212     prepareKernel();
    213 }
    214 
     191    mKernelStateType = StructType::create(idb->getContext(), mKernelFields, getName());
     192}
    215193
    216194// Default kernel signature: generate the IR and emit as byte code.
    217 std::string Kernel::makeSignature() {
    218     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     195std::string Kernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     196    assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    219197    if (LLVM_LIKELY(moduleIDisSignature())) {
    220198        return getModule()->getModuleIdentifier();
    221199    } else {
    222         generateKernel();
     200        generateKernel(idb);
    223201        std::string signature;
    224202        raw_string_ostream OS(signature);
     
    228206}
    229207
    230 void Kernel::generateKernel() {
    231     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     208void Kernel::generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     209    assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    232210    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
    233211    // generated the unoptimized IR.
    234212    if (!mIsGenerated) {
    235         auto ip = iBuilder->saveIP();
    236         auto saveInstance = getInstance();
    237         addKernelDeclarations();
    238         callGenerateInitializeMethod();
    239         callGenerateDoSegmentMethod();       
    240         callGenerateFinalizeMethod();
     213        const auto m = idb->getModule();
     214        const auto ip = idb->saveIP();
     215        const auto saveInstance = getInstance();
     216        idb->setModule(mModule);
     217        addKernelDeclarations(idb);
     218        callGenerateInitializeMethod(idb);
     219        callGenerateDoSegmentMethod(idb);
     220        callGenerateFinalizeMethod(idb);
    241221        setInstance(saveInstance);
    242         iBuilder->restoreIP(ip);
     222        idb->setModule(m);
     223        idb->restoreIP(ip);
    243224        mIsGenerated = true;
    244225    }
    245226}
    246227
    247 inline void Kernel::callGenerateInitializeMethod() {
    248     mCurrentMethod = getInitFunction(iBuilder->getModule());
    249     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     228inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     229    mCurrentMethod = getInitFunction(idb->getModule());
     230    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    250231    Function::arg_iterator args = mCurrentMethod->arg_begin();
    251232    setInstance(&*(args++));
    252     iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
     233    idb->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
    253234    for (const auto & binding : mScalarInputs) {
    254         setScalarField(binding.name, &*(args++));
     235        idb->setScalarField(binding.name, &*(args++));
    255236    }
    256237    for (const auto & binding : mStreamSetOutputs) {
    257         setConsumerLock(binding.name, &*(args++));
    258     }
    259     generateInitializeMethod();
    260     iBuilder->CreateRetVoid();
    261 }
    262 
    263 inline void Kernel::callGenerateDoSegmentMethod() {
    264     mCurrentMethod = getDoSegmentFunction(iBuilder->getModule());
    265     BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
    266     iBuilder->SetInsertPoint(entry);
     238        idb->setConsumerLock(binding.name, &*(args++));
     239    }
     240    generateInitializeMethod(idb);
     241    idb->CreateRetVoid();
     242}
     243
     244inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     245    mCurrentMethod = getDoSegmentFunction(idb->getModule());
     246    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    267247    auto args = mCurrentMethod->arg_begin();
    268248    setInstance(&*(args++));
     
    273253        mAvailableItemCount[i] = &*(args++);
    274254    }
    275     generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
     255    generateDoSegmentMethod(idb); // must be overridden by the KernelBuilder subtype
    276256    mIsFinal = nullptr;
    277257    mAvailableItemCount.clear();
    278     iBuilder->CreateRetVoid();
    279 }
    280 
    281 inline void Kernel::callGenerateFinalizeMethod() {
    282     mCurrentMethod = getTerminateFunction(iBuilder->getModule());
    283     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     258    idb->CreateRetVoid();
     259}
     260
     261inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb) {
     262    mCurrentMethod = getTerminateFunction(idb->getModule());
     263    idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    284264    auto args = mCurrentMethod->arg_begin();
    285265    setInstance(&*(args++));
    286     generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
     266    generateFinalizeMethod(idb); // may be overridden by the KernelBuilder subtype
    287267    const auto n = mScalarOutputs.size();
    288268    if (n == 0) {
    289         iBuilder->CreateRetVoid();
     269        idb->CreateRetVoid();
    290270    } else {
    291271        Value * outputs[n];
    292272        for (unsigned i = 0; i < n; ++i) {
    293             outputs[i] = getScalarField(mScalarOutputs[i].name);
     273            outputs[i] = idb->getScalarField(mScalarOutputs[i].name);
    294274        }
    295275        if (n == 1) {
    296             iBuilder->CreateRet(outputs[0]);
     276            idb->CreateRet(outputs[0]);
    297277        } else {
    298             iBuilder->CreateAggregateRet(outputs, n);
     278            idb->CreateAggregateRet(outputs, n);
    299279        }
    300280    }
     
    302282
    303283unsigned Kernel::getScalarIndex(const std::string & name) const {
    304     assert ("getScalarIndex was given a null IDISA Builder" && iBuilder);
    305284    const auto f = mKernelMap.find(name);
    306285    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
     286        assert (false);
    307287        report_fatal_error(getName() + " does not contain scalar: " + name);
    308288    }
     
    310290}
    311291
    312 Value * Kernel::getProducedItemCount(const std::string & name, Value * doFinal) const {
    313     Port port; unsigned ssIdx;
    314     std::tie(port, ssIdx) = getStreamPort(name);
    315     assert (port == Port::Output);
    316     if (mStreamSetOutputs[ssIdx].rate.isExact()) {
    317         std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    318         std::string principalField;
    319         if (refSet.empty()) {
    320             if (mStreamSetInputs.empty()) {
    321                 principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
    322             } else {
    323                 principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
    324             }
    325         } else {
    326             Port port; unsigned pfIndex;
    327             std::tie(port, pfIndex) = getStreamPort(refSet);
    328             if (port == Port::Input) {
    329                principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    330             } else {
    331                principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
    332             }
    333         }
    334         Value * principalItemsProcessed = getScalarField(principalField);
    335         return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
    336     }
    337     return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
    338 }
    339 
    340 llvm::Value * Kernel::getAvailableItemCount(const std::string & name) const {
    341     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    342         if (mStreamSetInputs[i].name == name) {
    343             return mAvailableItemCount[i];
    344         }
    345     }
    346     return nullptr;
    347 }
    348 
    349 Value * Kernel::getProcessedItemCount(const std::string & name) const {
    350     Port port; unsigned ssIdx;
    351     std::tie(port, ssIdx) = getStreamPort(name);
    352     assert (port == Port::Input);
    353     if (mStreamSetInputs[ssIdx].rate.isExact()) {
    354         std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
    355         if (refSet.empty()) {
    356             refSet = mStreamSetInputs[0].name;
    357         }
    358         Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
    359         return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
    360     }
    361     return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
    362 }
    363 
    364 Value * Kernel::getConsumedItemCount(const std::string & name) const {
    365     return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
    366 }
    367 
    368 void Kernel::setProducedItemCount(const std::string & name, Value * value) const {
    369     setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    370 }
    371 
    372 void Kernel::setProcessedItemCount(const std::string & name, Value * value) const {
    373     setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    374 }
    375 
    376 void Kernel::setConsumedItemCount(const std::string & name, Value * value) const {
    377     setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
    378 }
    379 
    380 Value * Kernel::getTerminationSignal() const {
    381     return getScalarField(TERMINATION_SIGNAL);
    382 }
    383 
    384 void Kernel::setTerminationSignal() const {
    385     setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
    386 }
    387 
    388 LoadInst * Kernel::acquireLogicalSegmentNo() const {
    389     assert (iBuilder);
    390     return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
    391 }
    392 
    393 void Kernel::releaseLogicalSegmentNo(Value * nextSegNo) const {
    394     iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
    395 }
    396 
    397 llvm::Value * Kernel::getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const {
    398     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    399     return buf->getLinearlyAccessibleItems(iBuilder, fromPosition);
    400 }
    401 
    402 llvm::Value * Kernel::getConsumerLock(const std::string & name) const {
    403     return getScalarField(name + CONSUMER_SUFFIX);
    404 }
    405 
    406 void Kernel::setConsumerLock(const std::string & name, llvm::Value * value) const {
    407     setScalarField(name + CONSUMER_SUFFIX, value);
    408 }
    409 
    410 inline Value * Kernel::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
    411     for (const Binding & b : bindings) {
    412         if (b.name == name) {
    413             const auto divisor = iBuilder->getBitBlockWidth();
    414             if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
    415                 return iBuilder->CreateLShr(itemCount, std::log2(divisor));
    416             } else {
    417                 return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
    418             }
    419         }
    420     }
    421     report_fatal_error("Error: no binding in " + getName() + " for " + name);
    422 }
    423 
    424 Value * Kernel::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
    425     Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    426     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    427     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
    428 }
    429 
    430 Value * Kernel::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
    431     return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
    432 }
    433 
    434 Value * Kernel::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
    435     Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    436     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    437     return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
    438 }
    439 
    440 Value * Kernel::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
    441     return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
    442 }
    443 
    444 llvm::Value * Kernel::getInputStreamSetCount(const std::string & name) const {
    445     return getInputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
    446 }
    447 
    448 llvm::Value * Kernel::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
    449     Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
    450     blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
    451     const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
    452     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
    453 }
    454 
    455 Value * Kernel::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
    456     Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
    457     const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
    458     return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
    459 }
    460 
    461 void Kernel::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
    462     return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
    463 }
    464 
    465 Value * Kernel::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
    466     Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
    467     const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
    468     return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
    469 }
    470 
    471 void Kernel::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
    472     return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
    473 }
    474 
    475 llvm::Value * Kernel::getOutputStreamSetCount(const std::string & name) const {
    476     return getOutputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
    477 }
    478 
    479 Value * Kernel::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
    480     return getInputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
    481 }
    482 
    483 Value * Kernel::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
    484     return getOutputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
    485 }
    486 
    487 Value * Kernel::getBaseAddress(const std::string & name) const {
    488     return getAnyStreamSetBuffer(name)->getBaseAddress(iBuilder, getStreamSetBufferPtr(name));
    489 }
    490 
    491 void Kernel::setBaseAddress(const std::string & name, Value * const addr) const {
    492     return getAnyStreamSetBuffer(name)->setBaseAddress(iBuilder, getStreamSetBufferPtr(name), addr);
    493 }
    494 
    495 Value * Kernel::getBufferedSize(const std::string & name) const {
    496     return getAnyStreamSetBuffer(name)->getBufferedSize(iBuilder, getStreamSetBufferPtr(name));
    497 }
    498 
    499 void Kernel::setBufferedSize(const std::string & name, Value * size) const {
    500     unsigned index; Port port;
    501     std::tie(port, index) = getStreamPort(name);
    502     const StreamSetBuffer * buf = nullptr;
    503     if (port == Port::Input) {
    504         assert (index < mStreamSetInputBuffers.size());
    505         buf = mStreamSetInputBuffers[index];
    506     } else {
    507         assert (index < mStreamSetOutputBuffers.size());
    508         buf = mStreamSetOutputBuffers[index];
    509     }
    510     buf->setBufferedSize(iBuilder, getStreamSetBufferPtr(name), size);
    511 }
    512 
    513 BasicBlock * Kernel::CreateWaitForConsumers() const {
    514 
    515     const auto consumers = getStreamOutputs();
    516     BasicBlock * const entry = iBuilder->GetInsertBlock();
    517     if (consumers.empty()) {
    518         return entry;
    519     } else {
    520         Function * const parent = entry->getParent();
    521         IntegerType * const sizeTy = iBuilder->getSizeTy();
    522         ConstantInt * const zero = iBuilder->getInt32(0);
    523         ConstantInt * const one = iBuilder->getInt32(1);
    524         ConstantInt * const size0 = iBuilder->getSize(0);
    525 
    526         Value * const segNo = acquireLogicalSegmentNo();
    527         const auto n = consumers.size();
    528         BasicBlock * load[n + 1];
    529         BasicBlock * wait[n];
    530         for (unsigned i = 0; i < n; ++i) {
    531             load[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Load", parent);
    532             wait[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Wait", parent);
    533         }
    534         load[n] = BasicBlock::Create(iBuilder->getContext(), "Resume", parent);
    535         iBuilder->CreateBr(load[0]);
    536         for (unsigned i = 0; i < n; ++i) {
    537 
    538             iBuilder->SetInsertPoint(load[i]);
    539             Value * const outputConsumers = getConsumerLock(consumers[i].name);
    540 
    541             Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
    542             Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
    543             Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
    544             iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
    545 
    546             iBuilder->SetInsertPoint(wait[i]);
    547             PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
    548             consumerPhi->addIncoming(size0, load[i]);
    549 
    550             Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
    551             Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
    552             Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    553             assert (ready->getType() == iBuilder->getInt1Ty());
    554             Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
    555             consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
    556             Value * const next = iBuilder->CreateICmpEQ(nextConsumerIdx, consumerCount);
    557             iBuilder->CreateCondBr(next, load[i + 1], wait[i]);
    558         }
    559 
    560         BasicBlock * const exit = load[n];
    561         iBuilder->SetInsertPoint(exit);
    562         return exit;
    563     }
    564 
    565 }
    566 
    567 Value * Kernel::getStreamSetBufferPtr(const std::string & name) const {
    568     return getScalarField(name + BUFFER_PTR_SUFFIX);
    569 }
    570 
    571 //Argument * Kernel::getParameter(Function * const f, const std::string & name) const {
    572 //    for (auto & arg : f->getArgumentList()) {
    573 //        if (arg.getName().equals(name)) {
    574 //            return &arg;
    575 //        }
    576 //    }
    577 //    report_fatal_error(getName() + " does not have parameter " + name);
    578 //}
    579 
    580 CallInst * Kernel::createDoSegmentCall(const std::vector<Value *> & args) const {
    581     Function * const doSegment = getDoSegmentFunction(iBuilder->getModule());
    582     assert (doSegment->getArgumentList().size() == args.size());
    583     return iBuilder->CreateCall(doSegment, args);
    584 }
    585 
    586 Value * Kernel::getAccumulator(const std::string & accumName) const {
    587     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    588     if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
    589         report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
    590     }
    591     const auto n = mScalarOutputs.size();
    592     if (LLVM_UNLIKELY(n == 0)) {
    593         report_fatal_error(getName() + " has no output scalars.");
    594     } else {
    595         for (unsigned i = 0; i < n; ++i) {
    596             const Binding & b = mScalarOutputs[i];
    597             if (b.name == accumName) {
    598                 if (n == 1) {
    599                     return mOutputScalarResult;
    600                 } else {
    601                     return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
    602                 }
    603             }
    604         }
    605         report_fatal_error(getName() + " has no output scalar named " + accumName);
    606     }
    607 }
    608 
    609 BasicBlock * Kernel::CreateBasicBlock(std::string && name) const {
    610     return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
    611 }
    612 
    613 Value * Kernel::createInstance() {
    614     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     292Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & idb) {
     293    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    615294    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    616295        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
    617296    }
    618     setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
     297    setInstance(idb->CreateCacheAlignedAlloca(mKernelStateType));
    619298    return getInstance();
    620299}
    621300
    622 void Kernel::initializeInstance() {
    623     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
     301void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & idb) {
     302    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    624303    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
    625304        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
     
    656335    }
    657336    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
    658     IntegerType * const sizeTy = iBuilder->getSizeTy();
     337    IntegerType * const sizeTy = idb->getSizeTy();
    659338    PointerType * const sizePtrTy = sizeTy->getPointerTo();
    660339    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
     
    664343        const auto & consumers = output->getConsumers();
    665344        const auto n = consumers.size();
    666         AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
    667         Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
     345        AllocaInst * const outputConsumers = idb->CreateAlloca(consumerTy);
     346        Value * const consumerSegNoArray = idb->CreateAlloca(ArrayType::get(sizePtrTy, n));
    668347        for (unsigned i = 0; i < n; ++i) {
    669348            Kernel * const consumer = consumers[i];
    670349            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
    671             Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
    672             iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
    673         }
    674         Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    675         iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
    676         Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    677         iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
     350            idb->setKernel(consumer);
     351            Value * const segmentNoPtr = idb->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
     352            idb->CreateStore(segmentNoPtr, idb->CreateGEP(consumerSegNoArray, { idb->getInt32(0), idb->getInt32(i) }));
     353        }
     354        idb->setKernel(this);
     355        Value * const consumerCountPtr = idb->CreateGEP(outputConsumers, {idb->getInt32(0), idb->getInt32(0)});
     356        idb->CreateStore(idb->getSize(n), consumerCountPtr);
     357        Value * const consumerSegNoArrayPtr = idb->CreateGEP(outputConsumers, {idb->getInt32(0), idb->getInt32(1)});
     358        idb->CreateStore(idb->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
    678359        args.push_back(outputConsumers);
    679360    }
    680 
    681     iBuilder->CreateCall(getInitFunction(iBuilder->getModule()), args);
     361    idb->CreateCall(getInitFunction(idb->getModule()), args);
    682362}
    683363
     
    685365//  each block of the given number of blocksToDo, and then updates counts.
    686366
    687 void BlockOrientedKernel::generateDoSegmentMethod() {   
    688     BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    689     BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
    690     mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
    691     BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
    692     BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
    693     BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
     367void BlockOrientedKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) {
     368    BasicBlock * const entryBlock = idb->GetInsertBlock();
     369    BasicBlock * const strideLoopCond = idb->CreateBasicBlock(getName() + "_strideLoopCond");
     370    mStrideLoopBody = idb->CreateBasicBlock(getName() + "_strideLoopBody");
     371    BasicBlock * const stridesDone = idb->CreateBasicBlock(getName() + "_stridesDone");
     372    BasicBlock * const doFinalBlock = idb->CreateBasicBlock(getName() + "_doFinalBlock");
     373    BasicBlock * const segmentDone = idb->CreateBasicBlock(getName() + "_segmentDone");
    694374
    695375    Value * baseTarget = nullptr;
    696     if (useIndirectBr()) {
    697         baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
    698     }
    699 
    700     ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     376    if (idb->supportsIndirectBr()) {
     377        baseTarget = idb->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
     378    }
     379
     380    ConstantInt * stride = idb->getSize(idb->getStride());
    701381    Value * availablePos = mAvailableItemCount[0];
    702     Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    703     Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    704     Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
    705 
    706     iBuilder->CreateBr(strideLoopCond);
    707 
    708     iBuilder->SetInsertPoint(strideLoopCond);
     382    Value * processed = idb->getProcessedItemCount(mStreamSetInputs[0].name);
     383    Value * itemsAvail = idb->CreateSub(availablePos, processed);
     384    Value * stridesToDo = idb->CreateUDiv(itemsAvail, stride);
     385
     386    idb->CreateBr(strideLoopCond);
     387
     388    idb->SetInsertPoint(strideLoopCond);
    709389
    710390    PHINode * branchTarget = nullptr;
    711     if (useIndirectBr()) {
    712         branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
     391    if (idb->supportsIndirectBr()) {
     392        branchTarget = idb->CreatePHI(baseTarget->getType(), 2, "branchTarget");
    713393        branchTarget->addIncoming(baseTarget, entryBlock);
    714394    }
    715395
    716     PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
     396    PHINode * const stridesRemaining = idb->CreatePHI(idb->getSizeTy(), 2, "stridesRemaining");
    717397    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    718398    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
    719399    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
    720     Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
    721     iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
    722 
    723     iBuilder->SetInsertPoint(mStrideLoopBody);
    724 
    725     if (useIndirectBr()) {
    726         mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
     400    Value * notDone = idb->CreateICmpSGT(stridesRemaining, idb->getSize(0));
     401    idb->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
     402
     403    idb->SetInsertPoint(mStrideLoopBody);
     404
     405    if (idb->supportsIndirectBr()) {
     406        mStrideLoopTarget = idb->CreatePHI(baseTarget->getType(), 2, "strideTarget");
    727407        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
    728408    }
     
    730410    /// GENERATE DO BLOCK METHOD
    731411
    732     writeDoBlockMethod();
     412    writeDoBlockMethod(idb);
    733413
    734414    /// UPDATE PROCESSED COUNTS
    735415
    736     processed = getProcessedItemCount(mStreamSetInputs[0].name);
    737     Value * itemsDone = iBuilder->CreateAdd(processed, stride);
    738     setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    739 
    740     stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    741 
    742     BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
    743     if (useIndirectBr()) {
     416    processed = idb->getProcessedItemCount(mStreamSetInputs[0].name);
     417    Value * itemsDone = idb->CreateAdd(processed, stride);
     418    idb->setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
     419
     420    stridesRemaining->addIncoming(idb->CreateSub(stridesRemaining, idb->getSize(1)), idb->GetInsertBlock());
     421
     422    BasicBlock * bodyEnd = idb->GetInsertBlock();
     423    if (idb->supportsIndirectBr()) {
    744424        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
    745425    }
    746     iBuilder->CreateBr(strideLoopCond);
     426    idb->CreateBr(strideLoopCond);
    747427
    748428    stridesDone->moveAfter(bodyEnd);
    749429
    750     iBuilder->SetInsertPoint(stridesDone);
     430    idb->SetInsertPoint(stridesDone);
    751431
    752432    // Now conditionally perform the final block processing depending on the doFinal parameter.
    753     if (useIndirectBr()) {
    754         mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
     433    if (idb->supportsIndirectBr()) {
     434        mStrideLoopBranch = idb->CreateIndirectBr(branchTarget, 3);
    755435        mStrideLoopBranch->addDestination(doFinalBlock);
    756436        mStrideLoopBranch->addDestination(segmentDone);
    757437    } else {
    758         iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
     438        idb->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
    759439    }
    760440
    761441    doFinalBlock->moveAfter(stridesDone);
    762442
    763     iBuilder->SetInsertPoint(doFinalBlock);
    764 
    765     Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
    766     writeFinalBlockMethod(remainingItems);
     443    idb->SetInsertPoint(doFinalBlock);
     444
     445    Value * remainingItems = idb->CreateSub(mAvailableItemCount[0], idb->getProcessedItemCount(mStreamSetInputs[0].name));
     446
     447    writeFinalBlockMethod(idb, remainingItems);
    767448
    768449    itemsDone = mAvailableItemCount[0];
    769     setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    770     setTerminationSignal();
    771     iBuilder->CreateBr(segmentDone);
    772 
    773     segmentDone->moveAfter(iBuilder->GetInsertBlock());
    774 
    775     iBuilder->SetInsertPoint(segmentDone);
     450    idb->setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
     451    idb->setTerminationSignal();
     452    idb->CreateBr(segmentDone);
     453
     454    segmentDone->moveAfter(idb->GetInsertBlock());
     455
     456    idb->SetInsertPoint(segmentDone);
    776457
    777458    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
    778     if (useIndirectBr()) {
    779         MDBuilder mdb(iBuilder->getContext());
     459    if (idb->supportsIndirectBr()) {
     460        MDBuilder mdb(idb->getContext());
    780461        const auto destinations = mStrideLoopBranch->getNumDestinations();
    781462        uint32_t weights[destinations];
     
    789470}
    790471
    791 inline void BlockOrientedKernel::writeDoBlockMethod() {
     472inline void BlockOrientedKernel::writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) {
    792473
    793474    Value * const self = getInstance();
    794475    Function * const cp = mCurrentMethod;
    795     auto ip = iBuilder->saveIP();
     476    auto ip = idb->saveIP();
    796477
    797478    /// Check if the do block method is called and create the function if necessary   
    798     if (!useIndirectBr()) {
    799         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
    800         mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     479    if (!idb->supportsIndirectBr()) {
     480        FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType()}, false);
     481        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, idb->getModule());
    801482        mCurrentMethod->setCallingConv(CallingConv::C);
    802483        mCurrentMethod->setDoesNotThrow();
     
    805486        args->setName("self");
    806487        setInstance(&*args);
    807         iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     488        idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
    808489    }
    809490
     
    811492    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    812493        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    813             priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
    814         }
    815     }
    816 
    817     generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
    818 
     494            priorProduced.push_back(idb->getProducedItemCount(mStreamSetOutputs[i].name));
     495        }
     496    }
     497
     498    generateDoBlockMethod(idb); // must be implemented by the BlockOrientedKernelBuilder subtype
     499
     500    unsigned priorIdx = 0;
     501    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     502        Value * log2BlockSize = idb->getSize(std::log2(idb->getBitBlockWidth()));
     503        if (SwizzledCopybackBuffer * const cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     504            BasicBlock * copyBack = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     505            BasicBlock * done = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     506            Value * newlyProduced = idb->CreateSub(idb->getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
     507            Value * priorBlock = idb->CreateLShr(priorProduced[priorIdx], log2BlockSize);
     508            Value * priorOffset = idb->CreateAnd(priorProduced[priorIdx], idb->getSize(idb->getBitBlockWidth() - 1));
     509            Value * instance = idb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     510            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(idb.get(), priorBlock);
     511            Value * accessible = idb->CreateSub(idb->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
     512            Value * wraparound = idb->CreateICmpULT(accessible, newlyProduced);
     513            idb->CreateCondBr(wraparound, copyBack, done);
     514            idb->SetInsertPoint(copyBack);
     515            Value * copyItems = idb->CreateSub(newlyProduced, accessible);
     516            cb->createCopyBack(idb.get(), instance, copyItems);
     517            idb->CreateBr(done);
     518            idb->SetInsertPoint(done);
     519            priorIdx++;
     520        }
     521        if (CircularCopybackBuffer * const cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     522            BasicBlock * copyBack = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     523            BasicBlock * done = idb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     524            Value * instance = idb->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     525            Value * newlyProduced = idb->CreateSub(idb->getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
     526            Value * accessible = cb->getLinearlyAccessibleItems(idb.get(), priorProduced[priorIdx]);
     527            Value * wraparound = idb->CreateICmpULT(accessible, newlyProduced);
     528            idb->CreateCondBr(wraparound, copyBack, done);
     529            idb->SetInsertPoint(copyBack);
     530            Value * copyItems = idb->CreateSub(newlyProduced, accessible);
     531            cb->createCopyBack(idb.get(), instance, copyItems);
     532            idb->CreateBr(done);
     533            idb->SetInsertPoint(done);
     534            priorIdx++;
     535        }
     536    }
     537
     538
     539    /// Call the do block method if necessary then restore the current function state to the do segement method
     540    if (!idb->supportsIndirectBr()) {
     541        idb->CreateRetVoid();
     542        mDoBlockMethod = mCurrentMethod;
     543        idb->restoreIP(ip);
     544        idb->CreateCall(mCurrentMethod, self);
     545        setInstance(self);
     546        mCurrentMethod = cp;
     547    }
     548
     549}
     550
     551inline void BlockOrientedKernel::writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * remainingItems) {
     552
     553    Value * const self = getInstance();
     554    Function * const cp = mCurrentMethod;
     555    Value * const remainingItemCount = remainingItems;
     556    auto ip = idb->saveIP();
     557
     558    if (!idb->supportsIndirectBr()) {
     559        FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType(), idb->getSizeTy()}, false);
     560        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, idb->getModule());
     561        mCurrentMethod->setCallingConv(CallingConv::C);
     562        mCurrentMethod->setDoesNotThrow();
     563        mCurrentMethod->setDoesNotCapture(1);
     564        auto args = mCurrentMethod->arg_begin();
     565        args->setName("self");
     566        setInstance(&*args);
     567        remainingItems = &*(++args);
     568        remainingItems->setName("remainingItems");
     569        idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
     570    }
     571
     572    generateFinalBlockMethod(idb, remainingItems); // may be implemented by the BlockOrientedKernel subtype
     573
     574    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
     575
     576    if (!idb->supportsIndirectBr()) {
     577        idb->CreateRetVoid();
     578        idb->restoreIP(ip);
     579        idb->CreateCall(mCurrentMethod, {self, remainingItemCount});
     580        mCurrentMethod = cp;
     581        setInstance(self);
     582    }
     583
     584}
     585
     586//  The default finalBlock method simply dispatches to the doBlock routine.
     587void BlockOrientedKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, Value * /* remainingItems */) {
     588    CreateDoBlockMethodCall(idb);
     589}
     590
     591void BlockOrientedKernel::CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb) {
     592    if (idb->supportsIndirectBr()) {
     593        BasicBlock * bb = idb->CreateBasicBlock("resume");
     594        mStrideLoopBranch->addDestination(bb);
     595        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), idb->GetInsertBlock());
     596        idb->CreateBr(mStrideLoopBody);
     597        bb->moveAfter(idb->GetInsertBlock());
     598        idb->SetInsertPoint(bb);
     599    } else {
     600        idb->CreateCall(mDoBlockMethod, getInstance());
     601    }
     602}
     603
     604void MultiBlockKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) {
     605
     606    // First prepare the multi-block method that will be used.
     607    KernelBuilder * const iBuilder = kb.get();
     608
     609    std::vector<Type *> multiBlockParmTypes;
     610    multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
     611    for (auto buffer : mStreamSetInputBuffers) {
     612        multiBlockParmTypes.push_back(buffer->getPointerType());
     613    }
     614    for (auto buffer : mStreamSetOutputBuffers) {
     615        multiBlockParmTypes.push_back(buffer->getPointerType());
     616    }
     617    FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
     618    Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
     619    multiBlockFunction->setCallingConv(CallingConv::C);
     620    multiBlockFunction->setDoesNotThrow();
     621    auto args = multiBlockFunction->arg_begin();
     622    args->setName("self");
     623    for (auto binding : mStreamSetInputs) {
     624        (++args)->setName(binding.name + "BufPtr");
     625    }
     626    for (auto binding : mStreamSetOutputs) {
     627        (args++)->setName(binding.name + "BufPtr");
     628    }
     629
     630    // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
     631    // provide the required multi-block kernel logic.
     632    auto ip = iBuilder->saveIP();
     633    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
     634
     635    generateMultiBlockLogic();
     636
     637    iBuilder->CreateRetVoid();
     638    iBuilder->restoreIP(ip);
     639
     640    // Now proceed with creation of the doSegment method.
     641
     642    BasicBlock * const entry = iBuilder->GetInsertBlock();
     643    BasicBlock * const doSegmentOuterLoop = iBuilder->CreateBasicBlock(getName() + "_doSegmentOuterLoop");
     644    BasicBlock * const doMultiBlockCall = iBuilder->CreateBasicBlock(getName() + "_doMultiBlockCall");
     645    BasicBlock * const finalBlockCheck = iBuilder->CreateBasicBlock(getName() + "_finalBlockCheck");
     646    BasicBlock * const doTempBufferBlock = iBuilder->CreateBasicBlock(getName() + "_doTempBufferBlock");
     647    BasicBlock * const segmentDone = iBuilder->CreateBasicBlock(getName() + "_segmentDone");
     648
     649    Value * blockBaseMask = iBuilder->CreateNot(iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
     650
     651    //
     652    //  A. Temporary Buffer Area Determination
     653    //
     654    // For final block processing and for processing near the end of physical buffer
     655    // boundaries, we need to allocate temporary space for processing a full block of input.
     656    // Compute the size requirements to store stream set data at the declared processing
     657    // rates in reference to one block of the principal input stream.
     658    //
     659
     660    unsigned bitBlockWidth = iBuilder->getBitBlockWidth();
     661    std::vector<Type *> tempBuffers;
     662    std::vector<unsigned> itemsPerPrincipalBlock;
     663    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     664        auto & rate = mStreamSetInputs[i].rate;
     665        std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
     666        if (refSet.empty()) {
     667            itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
     668        }
     669        else {
     670            Port port; unsigned ssIdx;
     671            std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
     672            assert (port == Port::Input && ssIdx < i);
     673            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
     674        }
     675        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     676        if (blocks > 1) {
     677            tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
     678        }
     679        else {
     680            tempBuffers.push_back(mStreamSetInputBuffers[i]->getType());
     681        }
     682    }
     683    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     684        auto & rate = mStreamSetOutputs[i].rate;
     685        std::string refSet = mStreamSetOutputs[i].rate.referenceStreamSet();
     686        if (refSet.empty()) {
     687            itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
     688        }
     689        else {
     690            Port port; unsigned ssIdx;
     691            std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
     692            if (port == Port::Output) ssIdx += mStreamSetInputs.size();
     693            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
     694        }
     695        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     696        if (blocks > 1) {
     697            tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
     698        }
     699        else {
     700            tempBuffers.push_back(mStreamSetOutputBuffers[i]->getType());
     701        }
     702    }
     703    Type * tempParameterStructType = StructType::create(iBuilder->getContext(), tempBuffers);
     704    Value * tempParameterArea = iBuilder->CreateCacheAlignedAlloca(tempParameterStructType);
     705
     706    ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     707    Value * availablePos = mAvailableItemCount[0];
     708    Value * itemsAvail = availablePos;
     709    //  Make sure that corresponding data is available depending on processing rate
     710    //  for all input stream sets.
     711    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     712        Value * a = mAvailableItemCount[i];
     713        auto & rate = mStreamSetInputs[i].rate;
     714        assert (((rate.referenceStreamSet().empty()) || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
     715        Value * maxItems = rate.CreateMaxReferenceItemsCalculation(iBuilder, a);
     716        itemsAvail = iBuilder->CreateSelect(iBuilder->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
     717    }
     718
     719    Value * processed = iBuilder->getProcessedItemCount(mStreamSetInputs[0].name);
     720    Value * itemsToDo = iBuilder->CreateSub(itemsAvail, processed);
     721    Value * fullBlocksToDo = iBuilder->CreateUDiv(itemsToDo, blockSize);
     722    Value * excessItems = iBuilder->CreateURem(itemsToDo, blockSize);
     723
     724    //  Now we iteratively process these blocks using the doMultiBlock method.
     725    //  In each iteration, we process the maximum number of linearly accessible
     726    //  blocks on the principal input, reduced to ensure that the corresponding
     727    //  data is linearly available at the specified processing rates for the other inputs,
     728    //  and that each of the output buffers has sufficient linearly available space
     729    //  (using overflow areas, if necessary) for the maximum output that can be
     730    //  produced.
     731
     732    //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
     733    iBuilder->CreateBr(doSegmentOuterLoop);
     734
     735    iBuilder->SetInsertPoint(doSegmentOuterLoop);
     736    PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
     737    blocksRemaining->addIncoming(fullBlocksToDo, entry);
     738
     739
     740    // For each input buffer, determine the processedItemCount, the block pointer for the
     741    // buffer block containing the next item, and the number of linearly available items.
     742    //
     743    std::vector<Value *> processedItemCount;
     744    std::vector<Value *> inputBlockPtr;
     745    std::vector<Value *> producedItemCount;
     746    std::vector<Value *> outputBlockPtr;
     747
     748    //  Calculate linearly available blocks for all input stream sets.
     749    Value * linearlyAvailBlocks = nullptr;
     750    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     751        Value * p = iBuilder->getProcessedItemCount(mStreamSetInputs[i].name);
     752        Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
     753        Value * b = iBuilder->getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
     754        processedItemCount.push_back(p);
     755        inputBlockPtr.push_back(b);
     756        auto & rate = mStreamSetInputs[i].rate;
     757        Value * blocks = nullptr;
     758        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
     759            blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(iBuilder, blkNo);
     760        } else {
     761            Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, p);
     762            Value * items = rate.CreateMaxReferenceItemsCalculation(iBuilder, linearlyAvailItems);
     763            blocks = iBuilder->CreateUDiv(items, blockSize);
     764        }
     765        if (i == 0) {
     766            linearlyAvailBlocks = blocks;
     767        } else {
     768            linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     769        }
     770    }
     771
     772    //  Now determine the linearly writeable blocks, based on available blocks reduced
     773    //  by limitations of output buffer space.
     774    Value * linearlyWritableBlocks = linearlyAvailBlocks;
     775
     776    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     777        Value * p = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
     778        Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
     779        Value * b = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
     780        producedItemCount.push_back(p);
     781        outputBlockPtr.push_back(b);
     782        auto & rate = mStreamSetOutputs[i].rate;
     783        Value * blocks = nullptr;
     784        if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
     785            blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(iBuilder, blkNo);
     786        } else {
     787            Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(iBuilder, p);
     788            blocks = iBuilder->CreateUDiv(writableItems, blockSize);
     789        }
     790        linearlyWritableBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
     791    }
     792    Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
     793
     794    iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, doTempBufferBlock);
     795
     796    //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
     797    //  Now prepare the doMultiBlock call.
     798    iBuilder->SetInsertPoint(doMultiBlockCall);
     799
     800    Value * linearlyAvailItems = iBuilder->CreateMul(linearlyWritableBlocks, blockSize);
     801
     802    std::vector<Value *> doMultiBlockArgs;
     803    doMultiBlockArgs.push_back(linearlyAvailItems);
     804    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     805        doMultiBlockArgs.push_back(iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]));
     806    }
     807    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     808        doMultiBlockArgs.push_back(iBuilder->getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]));
     809    }
     810
     811    iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
     812
     813    // Do copybacks if necessary.
    819814    unsigned priorIdx = 0;
    820815    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    821816        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
    822         if (SwizzledCopybackBuffer * const cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    823             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    824             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    825             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
    826             Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
    827             Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    828             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     817        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     818            BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     819            BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     820            Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     821            Value * priorBlock = iBuilder->CreateLShr(producedItemCount[i], log2BlockSize);
     822            Value * priorOffset = iBuilder->CreateAnd(producedItemCount[i], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
     823            Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    829824            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, priorBlock);
    830825            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
     
    838833            priorIdx++;
    839834        }
    840         if (CircularCopybackBuffer * const cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    841             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    842             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    843             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    844             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
    845             Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, priorProduced[priorIdx]);
    846             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    847             iBuilder->CreateCondBr(wraparound, copyBack, done);
    848             iBuilder->SetInsertPoint(copyBack);
    849             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    850             cb->createCopyBack(iBuilder, instance, copyItems);
    851             iBuilder->CreateBr(done);
    852             iBuilder->SetInsertPoint(done);
    853             priorIdx++;
    854         }
    855     }
    856 
    857 
    858     /// Call the do block method if necessary then restore the current function state to the do segement method
    859     if (!useIndirectBr()) {
    860         iBuilder->CreateRetVoid();
    861         mDoBlockMethod = mCurrentMethod;
    862         iBuilder->restoreIP(ip);
    863         iBuilder->CreateCall(mCurrentMethod, self);
    864         setInstance(self);
    865         mCurrentMethod = cp;
    866     }
    867 
    868 }
    869 
    870 inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
    871 
    872     Value * const self = getInstance();
    873     Function * const cp = mCurrentMethod;
    874     Value * const remainingItemCount = remainingItems;
    875     auto ip = iBuilder->saveIP();
    876 
    877     if (!useIndirectBr()) {
    878         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
    879         mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
    880         mCurrentMethod->setCallingConv(CallingConv::C);
    881         mCurrentMethod->setDoesNotThrow();
    882         mCurrentMethod->setDoesNotCapture(1);
    883         auto args = mCurrentMethod->arg_begin();
    884         args->setName("self");
    885         setInstance(&*args);
    886         remainingItems = &*(++args);
    887         remainingItems->setName("remainingItems");
    888         iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    889     }
    890 
    891     generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
    892 
    893     RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
    894 
    895     if (!useIndirectBr()) {
    896         iBuilder->CreateRetVoid();       
    897         iBuilder->restoreIP(ip);
    898         iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
    899         mCurrentMethod = cp;
    900         setInstance(self);
    901     }
    902 
    903 }
    904 
    905 //  The default finalBlock method simply dispatches to the doBlock routine.
    906 void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
    907     CreateDoBlockMethodCall();
    908 }
    909 
    910 bool BlockOrientedKernel::useIndirectBr() const {
    911     return iBuilder->supportsIndirectBr();
    912 }
    913 
    914 void BlockOrientedKernel::CreateDoBlockMethodCall() {
    915     if (useIndirectBr()) {
    916         BasicBlock * bb = CreateBasicBlock("resume");
    917         mStrideLoopBranch->addDestination(bb);
    918         mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
    919         iBuilder->CreateBr(mStrideLoopBody);
    920         bb->moveAfter(iBuilder->GetInsertBlock());
    921         iBuilder->SetInsertPoint(bb);
    922     } else {
    923         iBuilder->CreateCall(mDoBlockMethod, getInstance());
    924     }
    925 }
    926 
    927 void Kernel::finalizeInstance() {
    928     assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
    929     mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(iBuilder->getModule()), { getInstance() });
    930 }
    931 
    932 Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
    933     const auto f = mStreamMap.find(name);
    934     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    935         report_fatal_error(getName() + " does not contain stream set " + name);
    936     }
    937     return f->second;
    938 }
    939 
    940    
    941 void MultiBlockKernel::generateDoSegmentMethod() {
    942    
    943     // First prepare the multi-block method that will be used.
    944    
    945     std::vector<Type *> multiBlockParmTypes;
    946     multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
    947     for (auto buffer : mStreamSetInputBuffers) {
    948         multiBlockParmTypes.push_back(buffer->getPointerType());
    949     }
    950     for (auto buffer : mStreamSetOutputBuffers) {
    951         multiBlockParmTypes.push_back(buffer->getPointerType());
    952     }
    953     FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
    954     Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
    955     multiBlockFunction->setCallingConv(CallingConv::C);
    956     multiBlockFunction->setDoesNotThrow();
    957     auto args = multiBlockFunction->arg_begin();
    958     args->setName("self");
    959     for (auto binding : mStreamSetInputs) {
    960         (++args)->setName(binding.name + "BufPtr");
    961     }
    962     for (auto binding : mStreamSetOutputs) {
    963         (args++)->setName(binding.name + "BufPtr");
    964     }
    965    
    966     // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
    967     // provide the required multi-block kernel logic.
    968     auto ip = iBuilder->saveIP();
    969     iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
    970     generateMultiBlockLogic();
    971     iBuilder->CreateRetVoid();
    972     iBuilder->restoreIP(ip);
    973    
    974     // Now proceed with creation of the doSegment method.
    975    
    976     BasicBlock * const entry = iBuilder->GetInsertBlock();
    977     BasicBlock * const doSegmentOuterLoop = CreateBasicBlock(getName() + "_doSegmentOuterLoop");
    978     BasicBlock * const doMultiBlockCall = CreateBasicBlock(getName() + "_doMultiBlockCall");
    979     BasicBlock * const finalBlockCheck = CreateBasicBlock(getName() + "_finalBlockCheck");
    980     BasicBlock * const doTempBufferBlock = CreateBasicBlock(getName() + "_doTempBufferBlock");
    981     BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
    982    
    983     Value * blockBaseMask = iBuilder->CreateNot(iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    984    
    985     //
    986     //  A. Temporary Buffer Area Determination
    987     //
    988     // For final block processing and for processing near the end of physical buffer
    989     // boundaries, we need to allocate temporary space for processing a full block of input.
    990     // Compute the size requirements to store stream set data at the declared processing
    991     // rates in reference to one block of the principal input stream. 
    992     //
    993 
    994     unsigned bitBlockWidth = iBuilder->getBitBlockWidth();
    995     std::vector<Type *> tempBuffers;
    996     std::vector<unsigned> itemsPerPrincipalBlock;
    997     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    998         auto & rate = mStreamSetInputs[i].rate;
    999         std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    1000         if (refSet.empty()) {
    1001             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
    1002         }
    1003         else {
    1004             Port port; unsigned ssIdx;
    1005             std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    1006             assert (port == Port::Input && ssIdx < i);
    1007             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    1008         }
    1009         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
    1010         if (blocks > 1) {
    1011             tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
    1012         }
    1013         else {
    1014             tempBuffers.push_back(mStreamSetInputBuffers[i]->getType());
    1015         }
    1016     }
    1017     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1018         auto & rate = mStreamSetOutputs[i].rate;
    1019         std::string refSet = mStreamSetOutputs[i].rate.referenceStreamSet();
    1020         if (refSet.empty()) {
    1021             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
    1022         }
    1023         else {
    1024             Port port; unsigned ssIdx;
    1025             std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
    1026             if (port == Port::Output) ssIdx += mStreamSetInputs.size();
    1027             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    1028         }
    1029         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
    1030         if (blocks > 1) {
    1031             tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
    1032         }
    1033         else {
    1034             tempBuffers.push_back(mStreamSetOutputBuffers[i]->getType());
    1035         }
    1036     }
    1037     Type * tempParameterStructType = StructType::create(iBuilder->getContext(), tempBuffers);
    1038     Value * tempParameterArea = iBuilder->CreateCacheAlignedAlloca(tempParameterStructType);
    1039    
    1040     ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    1041     Value * availablePos = mAvailableItemCount[0];
    1042     Value * itemsAvail = availablePos;
    1043     //  Make sure that corresponding data is available depending on processing rate
    1044     //  for all input stream sets.
    1045     for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
    1046         Value * a = mAvailableItemCount[i];
    1047         auto & rate = mStreamSetInputs[i].rate;
    1048         assert (((rate.referenceStreamSet() == "") || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
    1049         Value * maxItems = rate.CreateMaxReferenceItemsCalculation(iBuilder, a);
    1050         itemsAvail = iBuilder->CreateSelect(iBuilder->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
    1051     }
    1052    
    1053     Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    1054     Value * itemsToDo = iBuilder->CreateSub(itemsAvail, processed);
    1055     Value * fullBlocksToDo = iBuilder->CreateUDiv(itemsToDo, blockSize);
    1056     Value * excessItems = iBuilder->CreateURem(itemsToDo, blockSize);
    1057    
    1058     //  Now we iteratively process these blocks using the doMultiBlock method. 
    1059     //  In each iteration, we process the maximum number of linearly accessible
    1060     //  blocks on the principal input, reduced to ensure that the corresponding
    1061     //  data is linearly available at the specified processing rates for the other inputs,
    1062     //  and that each of the output buffers has sufficient linearly available space
    1063     //  (using overflow areas, if necessary) for the maximum output that can be
    1064     //  produced.
    1065    
    1066     //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
    1067     iBuilder->CreateBr(doSegmentOuterLoop);
    1068    
    1069     iBuilder->SetInsertPoint(doSegmentOuterLoop);
    1070     PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
    1071     blocksRemaining->addIncoming(fullBlocksToDo, entry);
    1072    
    1073    
    1074     // For each input buffer, determine the processedItemCount, the block pointer for the
    1075     // buffer block containing the next item, and the number of linearly available items.
    1076     //
    1077     std::vector<Value *> processedItemCount;
    1078     std::vector<Value *> inputBlockPtr;
    1079     std::vector<Value *> producedItemCount;
    1080     std::vector<Value *> outputBlockPtr;
    1081    
    1082     //  Calculate linearly available blocks for all input stream sets.
    1083     Value * linearlyAvailBlocks = nullptr;
    1084     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    1085         Value * p = getProcessedItemCount(mStreamSetInputs[i].name);
    1086         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    1087         Value * b = getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
    1088         processedItemCount.push_back(p);
    1089         inputBlockPtr.push_back(b);
    1090         auto & rate = mStreamSetInputs[i].rate;
    1091         Value * blocks = nullptr;
    1092         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
    1093             blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(iBuilder, blkNo);
    1094         }
    1095         else {
    1096             Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(iBuilder, p);
    1097             Value * items = rate.CreateMaxReferenceItemsCalculation(iBuilder, linearlyAvailItems);
    1098             blocks = iBuilder->CreateUDiv(items, blockSize);
    1099         }
    1100         if (i == 0) {
    1101             linearlyAvailBlocks = blocks;
    1102         }
    1103         else {
    1104             linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
    1105         }
    1106     }
    1107    
    1108     //  Now determine the linearly writeable blocks, based on available blocks reduced
    1109     //  by limitations of output buffer space.
    1110     Value * linearlyWritableBlocks = linearlyAvailBlocks;
    1111    
    1112     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1113         Value * p = getProducedItemCount(mStreamSetOutputs[i].name);
    1114         Value * blkNo = iBuilder->CreateUDiv(p, blockSize);
    1115         Value * b = getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
    1116         producedItemCount.push_back(p);
    1117         outputBlockPtr.push_back(b);
    1118         auto & rate = mStreamSetOutputs[i].rate;
    1119         Value * blocks = nullptr;
    1120         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
    1121             blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(iBuilder, blkNo);
    1122         }
    1123         else {
    1124             Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(iBuilder, p);
    1125             blocks = iBuilder->CreateUDiv(writableItems, blockSize);
    1126         }
    1127         linearlyWritableBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
    1128     }
    1129     Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
    1130    
    1131     iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, doTempBufferBlock);
    1132    
    1133     //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
    1134     //  Now prepare the doMultiBlock call.
    1135     iBuilder->SetInsertPoint(doMultiBlockCall);
    1136    
    1137     Value * linearlyAvailItems = iBuilder->CreateMul(linearlyWritableBlocks, blockSize);
    1138    
    1139     std::vector<Value *> doMultiBlockArgs;
    1140     doMultiBlockArgs.push_back(linearlyAvailItems);
    1141     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    1142         doMultiBlockArgs.push_back(getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]));
    1143     }
    1144     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1145         doMultiBlockArgs.push_back(getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]));
    1146     }
    1147        
    1148     iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
    1149    
    1150     // Do copybacks if necessary.
    1151     unsigned priorIdx = 0;
    1152     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    1153         Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
    1154         if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    1155             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    1156             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    1157             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    1158             Value * priorBlock = iBuilder->CreateLShr(producedItemCount[i], log2BlockSize);
    1159             Value * priorOffset = iBuilder->CreateAnd(producedItemCount[i], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
    1160             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    1161             Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, priorBlock);
    1162             Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
    1163             Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
    1164             iBuilder->CreateCondBr(wraparound, copyBack, done);
    1165             iBuilder->SetInsertPoint(copyBack);
    1166             Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
    1167             cb->createCopyBack(iBuilder, instance, copyItems);
    1168             iBuilder->CreateBr(done);
    1169             iBuilder->SetInsertPoint(done);
    1170             priorIdx++;
    1171         }
    1172835        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
    1173             BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    1174             BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
    1175             Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
    1176             Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
     836            BasicBlock * copyBack = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
     837            BasicBlock * done = iBuilder->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     838            Value * instance = iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name);
     839            Value * newlyProduced = iBuilder->CreateSub(iBuilder->getProducedItemCount(mStreamSetOutputs[i].name), producedItemCount[i]);
    1177840            Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, producedItemCount[i]);
    1178841            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
     
    1186849        }
    1187850    }
    1188     setProcessedItemCount(mStreamSetInputs[0].name, iBuilder->CreateAdd(processed, linearlyAvailItems));
     851    iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, iBuilder->CreateAdd(processed, linearlyAvailItems));
    1189852    Value * reducedBlocksToDo = iBuilder->CreateSub(blocksRemaining, linearlyWritableBlocks);
    1190853    Value * fullBlocksRemain = iBuilder->CreateICmpUGT(reducedBlocksToDo, iBuilder->getSize(0));
     
    1192855    blocksRemaining->addIncoming(reducedBlocksToDo, multiBlockFinal);
    1193856    iBuilder->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, finalBlockCheck);
    1194    
     857
    1195858    // All the full blocks of input have been processed.  If mIsFinal is true,
    1196859    // we should process the remaining partial block (i.e., excessItems as determined at entry).
    1197860    iBuilder->SetInsertPoint(finalBlockCheck);
    1198861    iBuilder->CreateCondBr(mIsFinal, doTempBufferBlock, segmentDone);
    1199    
    1200     // 
     862
     863    //
    1201864    // We use temporary buffers in 3 different cases that preclude full block processing.
    1202865    // (a) One or more input buffers does not have a sufficient number of input items linearly available.
     
    1210873    tempBlockItems->addIncoming(blockSize, doSegmentOuterLoop);
    1211874    tempBlockItems->addIncoming(excessItems, finalBlockCheck);
    1212    
     875
    1213876    // Will this be the final block processing?
    1214877    Value * doFinal = iBuilder->CreateICmpULT(tempBlockItems, blockSize);
    1215    
     878
    1216879    // Begin constructing the doMultiBlock args.
    1217880    std::vector<Value *> tempArgs;
    1218881    tempArgs.push_back(tempBlockItems);
    1219    
     882
    1220883    // Prepare the temporary buffer area.
    1221884    //
     
    1223886    Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), iBuilder->getSizeTy(), false);
    1224887    iBuilder->CreateMemZero(tempParameterArea, tempAreaSize);
    1225    
     888
    1226889    // For each input and output buffer, copy over necessary data starting from the last
    1227890    // block boundary.
     
    1232895        Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(i));
    1233896        tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
    1234        
     897
    1235898        auto & rate = mStreamSetInputs[i].rate;
    1236899        Value * blockItemPos = iBuilder->CreateAnd(processedItemCount[i], blockBaseMask);
    1237        
     900
    1238901        // The number of items to copy is determined by the processing rate requirements.
    1239902        if (i > 1) {
     
    1255918        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, inputBlockPtr[i], copyItems1);
    1256919        Value * nextBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(availFromBase, blockSize));
    1257         mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    1258         Value * itemAddress = iBuilder->CreatePtrToInt(getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
     920        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, iBuilder->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     921        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
    1259922        Value * baseAddress = iBuilder->CreatePtrToInt(inputBlockPtr[i], iBuilder->getSizeTy());
    1260923        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
     
    1268931        blockItemPos.push_back(iBuilder->CreateAnd(producedItemCount[i], blockBaseMask));
    1269932        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, outputBlockPtr[i], iBuilder->CreateSub(producedItemCount[i], blockItemPos[i]));
    1270         Value * itemAddress = iBuilder->CreatePtrToInt(getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
     933        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
    1271934        Value * baseAddress = iBuilder->CreatePtrToInt(outputBlockPtr[i], iBuilder->getSizeTy());
    1272935        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
     
    1277940
    1278941    // Copy back data to the actual output buffers.
    1279    
     942
    1280943    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    1281944        Value * tempBufPtr = iBuilder->CreateGEP(tempParameterArea, iBuilder->getInt32(mStreamSetInputs.size() + i));
    1282945        tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    1283         Value * final_items = getProducedItemCount(mStreamSetOutputs[i].name);
     946        Value * final_items = iBuilder->getProducedItemCount(mStreamSetOutputs[i].name);
    1284947        Value * copyItems = iBuilder->CreateSub(final_items, blockItemPos[i]);
    1285948        Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(iBuilder, blockItemPos[i]); // must be a whole number of blocks.
     
    1287950        Value * copyItems2 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(copyItems, copyItems), iBuilder->getSize(0), iBuilder->CreateSub(copyItems, copyItems1));
    1288951        tempBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(copyItems1, blockSize));
    1289         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
    1290     }
    1291 
    1292     setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
     952        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, iBuilder->getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
     953    }
     954
     955    iBuilder->setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
    1293956
    1294957    //  We've dealt with the partial block processing and copied information back into the
     
    1298961    iBuilder->SetInsertPoint(segmentDone);
    1299962}
    1300                                                            
     963
     964void Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & idb) {
     965    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     966    mOutputScalarResult = idb->CreateCall(getTerminateFunction(idb->getModule()), { getInstance() });
     967}
     968
     969Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
     970    const auto f = mStreamMap.find(name);
     971    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
     972        report_fatal_error(getName() + " does not contain stream set " + name);
     973    }
     974    return f->second;
     975}
     976
    1301977// CONSTRUCTOR
    1302978Kernel::Kernel(std::string && kernelName,
     
    13451021}
    13461022
    1347 // CONSTRUCTOR
    1348 MultiBlockKernel::MultiBlockKernel(std::string && kernelName,
    1349                                    std::vector<Binding> && stream_inputs,
    1350                                    std::vector<Binding> && stream_outputs,
    1351                                    std::vector<Binding> && scalar_parameters,
    1352                                    std::vector<Binding> && scalar_outputs,
    1353                                    std::vector<Binding> && internal_scalars)
    1354 : Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
    1355    
    1356 }
    1357 }
     1023}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5439 r5440  
    2525
    2626class Kernel : public KernelInterface {
     27    friend class KernelBuilder;
    2728protected:
    2829    using KernelMap = boost::container::flat_map<std::string, unsigned>;
     
    3233    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
    3334    using Kernels = std::vector<Kernel *>;
    34 
    35     friend class KernelBuilder;
    36     friend void ::generateSegmentParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    37     friend void ::generatePipelineLoop(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    38     friend void ::generateParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
    3935
    4036    static const std::string DO_BLOCK_SUFFIX;
     
    7975    bool isCachable() const override { return false; }
    8076
    81     std::string makeSignature() override;
     77    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
    8278
    8379    // Can the module ID itself serve as the unique signature?
     
    8783    //
    8884
    89     void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    90 
    91     void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
     85    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     86
     87    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
    9288
    9389    llvm::Module * getModule() const {
     
    9591    }
    9692
    97     // Generate the Kernel to the current module (iBuilder->getModule()).
    98     void generateKernel();
     93    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
    9994   
    100     llvm::Value * createInstance() final;
    101 
    102     void initializeInstance() final;
    103 
    104     void finalizeInstance() final;
    105 
    106     llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
    107 
    108     void setProducedItemCount(const std::string & name, llvm::Value * value) const final;
    109 
    110     llvm::Value * getProcessedItemCount(const std::string & name) const final;
    111 
    112     void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
    113 
    114     llvm::Value * getConsumedItemCount(const std::string & name) const final;
    115 
    116     void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
    117 
    118     llvm::Value * getTerminationSignal() const final;
    119 
    120     void setTerminationSignal() const final;
    121 
    122     // Get the value of a scalar field for the current instance.
    123     llvm::Value * getScalarFieldPtr(llvm::Value * index) const;
    124 
    125     llvm::Value * getScalarFieldPtr(const std::string & fieldName) const;
    126 
    127     llvm::Value * getScalarField(const std::string & fieldName) const;
    128 
    129     // Set the value of a scalar field for the current instance.
    130     void setScalarField(const std::string & fieldName, llvm::Value * value) const;
    131 
    132     // Synchronization actions for executing a kernel for a particular logical segment.
    133     //
    134     // Before the segment is processed, acquireLogicalSegmentNo must be used to load
    135     // the segment number of the kernel state to ensure that the previous segment is
    136     // complete (by checking that the acquired segment number is equal to the desired segment
    137     // number).
    138     // After all segment processing actions for the kernel are complete, and any necessary
    139     // data has been extracted from the kernel for further pipeline processing, the
    140     // segment number must be incremented and stored using releaseLogicalSegmentNo.
    141     llvm::LoadInst * acquireLogicalSegmentNo() const;
    142 
    143     void releaseLogicalSegmentNo(llvm::Value * nextSegNo) const;
     95    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
     96
     97    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
     98
     99    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
    144100
    145101    bool hasNoTerminateAttribute() const {
     
    162118        return mStreamSetOutputBuffers[i];
    163119    }
    164 
    165     llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    166 
    167     llvm::Value * getAccumulator(const std::string & accumName) const;
    168120
    169121    virtual ~Kernel() = 0;
     
    194146    }
    195147
     148    unsigned getScalarIndex(const std::string & name) const;
     149
    196150    void prepareStreamSetNameMap();
    197151
    198     void linkExternalMethods() override { }
    199 
    200     virtual void prepareKernel();
    201 
    202     virtual void generateInitializeMethod() { }
     152    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
     153
     154    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
     155
     156    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
    203157   
    204     virtual void generateDoSegmentMethod() = 0;
    205 
    206     virtual void generateFinalizeMethod() { }
     158    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
     159
     160    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
    207161
    208162    // Add an additional scalar field to the KernelState struct.
     
    212166    unsigned addUnnamedScalar(llvm::Type * type);
    213167
    214     // Run-time access of Kernel State and parameters of methods for
    215     // use in implementing kernels.
    216    
    217     // Get the index of a named scalar field within the kernel state struct.
    218     unsigned getScalarIndex(const std::string & name) const;
    219 
    220     llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
    221 
    222     llvm::Value * loadInputStreamBlock(const std::string & name, llvm::Value * streamIndex) const;
    223    
    224     llvm::Value * getInputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    225    
    226     llvm::Value * loadInputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    227    
    228     llvm::Value * getInputStreamSetCount(const std::string & name) const;
    229 
    230     llvm::Value * getOutputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
    231    
    232     void storeOutputStreamBlock(const std::string & name, llvm::Value * streamIndex, llvm::Value * toStore) const;
    233    
    234     llvm::Value * getOutputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
    235    
    236     void storeOutputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex, llvm::Value * toStore) const;
    237 
    238     llvm::Value * getOutputStreamSetCount(const std::string & name) const;
    239 
    240     llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const;
    241 
    242     llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    243 
    244     llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    245 
    246     llvm::Value * getBaseAddress(const std::string & name) const;
    247 
    248     void setBaseAddress(const std::string & name, llvm::Value * addr) const;
    249 
    250     llvm::Value * getBufferedSize(const std::string & name) const;
    251 
    252     void setBufferedSize(const std::string & name, llvm::Value * size) const;
    253 
    254     void reserveBytes(const std::string & name, llvm::Value * requested) const;
    255 
    256     llvm::Value * getAvailableItemCount(const std::string & name) const;
    257 
    258     llvm::Value * getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const;
    259 
    260     llvm::BasicBlock * CreateWaitForConsumers() const;
    261 
    262     llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
    263 
    264     llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
    265 
    266168    llvm::Value * getIsFinal() const {
    267169        return mIsFinal;
    268170    }
    269171
    270     void callGenerateInitializeMethod();
    271 
    272     void callGenerateDoSegmentMethod();
    273 
    274     void callGenerateFinalizeMethod();
     172    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
     173
     174    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
     175
     176    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
    275177
    276178    StreamPort getStreamPort(const std::string & name) const;
     
    304206private:
    305207
    306     llvm::Value * getConsumerLock(const std::string & name) const;
    307 
    308     void setConsumerLock(const std::string & name, llvm::Value * value) const;
    309 
    310     llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
     208    llvm::Value * getAvailableItemCount(const unsigned i) const {
     209        return mAvailableItemCount[i];
     210    }
    311211
    312212protected:
     
    344244protected:
    345245
    346     void CreateDoBlockMethodCall();
     246    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
    347247
    348248    // Each kernel builder subtype must provide its own logic for generating
    349249    // doBlock calls.
    350     virtual void generateDoBlockMethod() = 0;
     250    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
    351251
    352252    // Each kernel builder subtypre must also specify the logic for processing the
     
    357257    // not be overridden.
    358258
    359     virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
    360 
    361     void generateDoSegmentMethod() override final;
     259    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
     260
     261    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
    362262
    363263    BlockOrientedKernel(std::string && kernelName,
     
    370270private:
    371271
    372     virtual bool useIndirectBr() const;
    373 
    374     void writeDoBlockMethod();
    375 
    376     void writeFinalBlockMethod(llvm::Value * remainingItems);
     272    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
     273
     274    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
    377275
    378276private:
     
    384282};
    385283
    386 /*   
     284/*
    387285The Multi-Block Kernel Builder
    388286------------------------------
     
    391289efficient kernels with possibly variable and/or nonaligned output, subject to
    392290exact or MaxRatio processing constraints.   The following restrictions apply.
    393    
     291
    394292#.  The input consists of one or more stream sets, the first of which is
    395     known as the principal input stream set. 
    396    
     293    known as the principal input stream set.
     294
    397295#.  If there is more than one input stream set, the additional stream sets must
    398296    have a processing rate defined with respect to the input stream set of one
     
    400298    declared without a processing rate attribute have the FixedRate(1) attribute
    401299    by default and therefore satisfy this constraint.
    402    
     300
    403301#.  All output stream sets must be declared with processing rate attributes
    404302    of one of the following types:
    405303    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
    406304    *  FixedRate with respect to some other output stream set.
    407    
     305
    408306    When using the Multi-Block Kernel Builder to program a new type of kernel,
    409307    the programmer must implement the generateDoMultiBlockMethod for normal
    410308    multi-block processing according to the requirements below, as well as
    411309    providing for special final block processing, if necessary.
    412            
     310
    413311#.  The doMultiBlockMethod will be called with the following parameters:
    414312    * the number of items of the principal input stream to process (itemsToDo),
     
    438336    * for any input pointer p, a GEP instruction with a single int32 index i
    439337      will produce a pointer to the buffer position corresponding to the ith block of the
    440       principal input stream set. 
     338      principal input stream set.
    441339    * for any output stream set declared with a Fixed or Add1 processing rate with respect
    442340      to the principal input stream set, a GEP instruction with a single int32 index i
    443341      will produce a pointer to the buffer position corresponding to the ith block of the
    444342      principal input stream set.
    445                    
     343
    446344#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
    447345    processed and produced item counts are updated for all stream sets that have exact
    448346    processing rate attributes.   Programmers are responsible for updating the producedItemCount
    449347    of any stream set declared with a variable attribute (MaxRatio).
    450                            
     348
    451349#.  An important caveat is that buffer areas may change arbitrarily between
    452350    calls to the doMultiBlockMethod.   In no case should a kernel store a
     
    467365
    468366    // Each multi-block kernel subtype must provide its own logic for handling
    469     // doMultiBlock calls, subject to the requirements laid out above. 
     367    // doMultiBlock calls, subject to the requirements laid out above.
    470368    // The generateMultiBlockLogic must be written to generate this logic, given
    471369    // a created but empty function.  Upon entry to generateMultiBlockLogic,
    472370    // the builder insertion point will be set to the entry block; upone
    473371    // exit the RetVoid instruction will be added to complete the method.
    474     //
    475     virtual void generateMultiBlockLogic () = 0;
     372    //
     373    virtual void generateMultiBlockLogic() = 0;
     374
     375private:
    476376
    477377    // Given a kernel subtype with an appropriate interface, the generateDoSegment
    478378    // method of the multi-block kernel builder makes all the necessary arrangements
    479379    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
    480     void generateDoSegmentMethod() override final;
     380    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
     381
    481382};
    482    
    483    
     383
     384
    484385}
    485386#endif
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5439 r5440  
    1010namespace kernel {
    1111
    12 Value * KernelBuilder::getScalarFieldPtr(Value * const index) {
    13     return CreateGEP(mKernel->getInstance(), {getInt32(0), index});
    14 }
    15 
    16 Value * KernelBuilder::getScalarFieldPtr(const std::string & fieldName) {
    17     return getScalarFieldPtr(getInt32(mKernel->getScalarIndex(fieldName)));
     12Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, Value * const index) {
     13    return CreateGEP(instance, {getInt32(0), index});
     14}
     15
     16Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName) {
     17    return getScalarFieldPtr(instance, getInt32(mKernel->getScalarIndex(fieldName)));
     18}
     19
     20llvm::Value * KernelBuilder::getScalarFieldPtr(llvm::Value * index) {
     21    return getScalarFieldPtr(mKernel->getInstance(), index);
     22}
     23
     24llvm::Value *KernelBuilder:: getScalarFieldPtr(const std::string & fieldName) {
     25    return getScalarFieldPtr(mKernel->getInstance(), fieldName);
    1826}
    1927
     
    4755        std::string principalField;
    4856        if (refSet.empty()) {
    49             const auto & principleSet = mKernel->getStreamOutput(0).name;
    5057            if (mKernel->getStreamInputs().empty()) {
    51                 principalField = principleSet + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
     58                principalField = mKernel->getStreamOutput(0).name + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
    5259            } else {
    53                 principalField = principleSet + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
     60                principalField = mKernel->getStreamInput(0).name + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
    5461            }
    5562        } else {
     
    8491
    8592Value * KernelBuilder::getAvailableItemCount(const std::string & name) {
    86 //    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    87 //        if (mStreamSetInputs[i].name == name) {
    88 //            return mAvailableItemCount[i];
    89 //        }
    90 //    }
     93    const auto & inputs = mKernel->getStreamInputs();
     94    for (unsigned i = 0; i < inputs.size(); ++i) {
     95        if (inputs[i].name == name) {
     96            return mKernel->getAvailableItemCount(i);
     97        }
     98    }
    9199    return nullptr;
    92100}
     
    220228}
    221229
    222 BasicBlock * KernelBuilder::CreateWaitForConsumers() {
     230
     231CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) {
     232    Function * const doSegment = mKernel->getDoSegmentFunction(getModule());
     233    assert (doSegment->getArgumentList().size() == args.size());
     234    return CreateCall(doSegment, args);
     235}
     236
     237Value * KernelBuilder::getAccumulator(const std::string & accumName) {
     238    auto results = mKernel->mOutputScalarResult;
     239    if (LLVM_UNLIKELY(results == nullptr)) {
     240        report_fatal_error("Cannot get accumulator " + accumName + " until " + mKernel->getName() + " has terminated.");
     241    }
     242    const auto & outputs = mKernel->getScalarOutputs();
     243    const auto n = outputs.size();
     244    if (LLVM_UNLIKELY(n == 0)) {
     245        report_fatal_error(mKernel->getName() + " has no output scalars.");
     246    } else {
     247        for (unsigned i = 0; i < n; ++i) {
     248            const Binding & b = outputs[i];
     249            if (b.name == accumName) {
     250                if (n == 1) {
     251                    return results;
     252                } else {
     253                    return CreateExtractValue(results, {i});
     254                }
     255            }
     256        }
     257        report_fatal_error(mKernel->getName() + " has no output scalar named " + accumName);
     258    }
     259}
     260
     261BasicBlock * KernelBuilder::CreateConsumerWait() {
    223262    const auto consumers = mKernel->getStreamOutputs();
    224263    BasicBlock * const entry = GetInsertBlock();
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5436 r5440  
    5454    // use in implementing kernels.
    5555
    56     // Get the index of a named scalar field within the kernel state struct.
    57     llvm::ConstantInt * getScalarIndex(const std::string & name);
    58 
    5956    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex);
    6057
     
    9592    llvm::Value * getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition);
    9693
    97     llvm::BasicBlock * CreateWaitForConsumers();
     94    llvm::BasicBlock * CreateConsumerWait();
    9895
    9996    llvm::Value * getStreamSetBufferPtr(const std::string & name);
     97
     98    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args);
     99
     100    llvm::Value * getAccumulator(const std::string & accumName);
     101
     102    llvm::Value * getConsumerLock(const std::string & name);
     103
     104    void setConsumerLock(const std::string & name, llvm::Value * value);
    100105
    101106    Kernel * getKernel() const {
     
    114119    }
    115120
     121    llvm::Value * getScalarFieldPtr(llvm::Value * instance, llvm::Value * index);
     122
     123    llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName);
     124
    116125private:
    117 
    118     llvm::Value * getConsumerLock(const std::string & name);
    119 
    120     void setConsumerLock(const std::string & name, llvm::Value * value);
    121126
    122127    llvm::Value * computeBlockIndex(llvm::Value * itemCount);
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp

    r5436 r5440  
    1111using namespace kernel;
    1212
    13 Value * getInputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
     13Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    1414    return iBuilder->CreateGEP(
    1515            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
     
    1818}
    1919
    20 Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
     20Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    2121    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    2222}
    2323
    24 void LZ4ByteStreamDecoderKernel::generateDoBlockMethod() {
     24void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    2525    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    26     BasicBlock * loopBody = CreateBasicBlock("bytestream_block_loop_body");
    27     BasicBlock * loopExit = CreateBasicBlock("bytestream_block_loop_exit");
     26    BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
     27    BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
    2828
    2929    Value * bufferSize = iBuilder->getSize(mBufferSize);
     
    3131    Value * iterations = selectMin(iBuilder,
    3232            iBuilder->getSize(iBuilder->getBitBlockWidth()),
    33             iBuilder->CreateSub(getAvailableItemCount("literalIndexes"), getProcessedItemCount("literalIndexes")));
    34     Value * inputBufferBasePtr = getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
    35     Value * outputBufferBasePtr = getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
     33            iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
     34    Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
     35    Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
    3636    iBuilder->CreateBr(loopBody);
    3737
     
    4343    // Indexes extraction.
    4444    Value * literalStartPtr = getInputPtr(iBuilder,
    45             getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
     45            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
    4646    Value * literalLengthPtr = getInputPtr(iBuilder,
    47             getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
     47            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
    4848    Value * matchOffsetPtr = getInputPtr(iBuilder,
    49             getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
     49            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
    5050    Value * matchLengthPtr = getInputPtr(iBuilder,
    51             getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
     51            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
    5252    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
    5353    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
     
    5656
    5757#if 0
    58     Value * processedItem = iBuilder->CreateAdd(getProcessedItemCount("literalIndexes"), phiInputIndex);
     58    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
    5959    iBuilder->CallPrintInt("ProccessedItem", processedItem);
    6060    iBuilder->CallPrintInt("LiteralStart", literalStart);
     
    6666    // =================================================
    6767    // Literals.
    68     Value * outputItems = getProducedItemCount("outputStream");
     68    Value * outputItems = iBuilder->getProducedItemCount("outputStream");
    6969    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
    7070    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
     
    9494            iBuilder->getSize(4)
    9595            );
    96     BasicBlock * cpyLoopCond = CreateBasicBlock("matchcopy_loop_cond");
    97     BasicBlock * cpyLoopBody = CreateBasicBlock("matchcopy_loop_body");
    98     BasicBlock * cpyLoopExit = CreateBasicBlock("matchcopy_loop_exit");
     96    BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
     97    BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
     98    BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
    9999    iBuilder->CreateBr(cpyLoopCond);
    100100
     
    117117    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
    118118#endif
    119     BasicBlock * reachingBufferEnd_then = CreateBasicBlock("matchcopy_reaching_buf_end_then");
    120     BasicBlock * reachingBufferEnd_else = CreateBasicBlock("matchcopy_reaching_buf_end_else");
     119    BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
     120    BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
    121121    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
    122122    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
     
    170170    iBuilder->SetInsertPoint(cpyLoopExit);
    171171    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
    172     setProducedItemCount("outputStream", outputItems);
     172    iBuilder->setProducedItemCount("outputStream", outputItems);
    173173
    174174    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
     
    182182    iBuilder->SetInsertPoint(loopExit);
    183183#ifndef NDEBUG
    184     iBuilder->CallPrintInt("Decompressed bytes", getProducedItemCount("outputStream"));
     184    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
    185185#endif
    186186}
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.h

    r5436 r5440  
    1818    LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize);
    1919protected:
    20     void generateDoBlockMethod() override;
     20    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2121private:
    2222    size_t mBufferSize;
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.cpp

    r5436 r5440  
    2828namespace {
    2929
    30 Value * generateBitswap(IDISA::IDISA_Builder * const iBuilder, Value * v) {
     30Value * generateBitswap(const std::unique_ptr<KernelBuilder> & iBuilder, Value * v) {
    3131    Value * bswapFunc = Intrinsic::getDeclaration(iBuilder->getModule(),
    3232            Intrinsic::bswap, v->getType());
     
    3434}
    3535
    36 Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
     36Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    3737    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    3838}
    3939
    40 Value * createStackVar(IDISA::IDISA_Builder * const iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
     40Value * createStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Type * type, StringRef name, Value * initializer = nullptr) {
    4141    Value * var = iBuilder->CreateAlloca(type, nullptr, name);
    4242    if (initializer) {
     
    4848}
    4949
    50 void incStackVar(IDISA::IDISA_Builder * const iBuilder, Value * svar, Value * increment = nullptr) {
     50void incStackVar(const std::unique_ptr<KernelBuilder> & iBuilder, Value * svar, Value * increment = nullptr) {
    5151    Value * value = iBuilder->CreateLoad(svar);
    5252    if (increment) {
     
    5858}
    5959
    60 Value * getOutputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
     60Value * getOutputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    6161    return iBuilder->CreateGEP(
    6262            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
     
    6767}       // anonymouse namespace
    6868
    69 
    70 /**
    71  * In order to allow mem2reg to promote the stack variables, alloca's have
    72  * to be in the entry block of a function. Thus, we need to disable indirect
    73  * branching on this kernel to have a standalone DoMethod function.
    74  */
    75 bool LZ4IndexDecoderKernel::useIndirectBr() const {
    76     return false;
    77 }
    78 
    79 
    8069/**
    8170 * Get the offset within the current word.
    8271 */
    83 Value * LZ4IndexDecoderKernel::getWordOffset() {
     72Value * LZ4IndexDecoderKernel::getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    8473    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
    8574    return iBuilder->CreateAnd(
     
    9382 * Get the offset of the start of the current word.
    9483 */
    95 Value * LZ4IndexDecoderKernel::getWordStartOffset() {
     84Value * LZ4IndexDecoderKernel::getWordStartOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
    9685    Value * wordWidthMask = iBuilder->getInt32(wordWidth - 1);
    9786    return iBuilder->CreateAnd(
     
    10695 * If offset is not provided, load the current byte by default.
    10796 */
    108 Value * LZ4IndexDecoderKernel::loadRawByte(Value * offset = nullptr) {
     97Value * LZ4IndexDecoderKernel::loadRawByte(const std::unique_ptr<KernelBuilder> & iBuilder, Value * offset) {
    10998    Value * blockStartPtr = iBuilder->CreatePointerCast(
    110             getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
     99            iBuilder->getInputStreamBlockPtr("byteStream", iBuilder->getInt32(0)),
    111100            iBuilder->getInt8PtrTy()
    112101            );
     
    125114 * cleared  = ....111
    126115 */
    127 void LZ4IndexDecoderKernel::setExtenderUntilOffset() {
     116void LZ4IndexDecoderKernel::setExtenderUntilOffset(const std::unique_ptr<KernelBuilder> & iBuilder) {
    128117    // Little-endian, offset counts from LSB
    129118    // extender = extender ^ ~((1 << offset) -1)
    130119    Value * extender = iBuilder->CreateLoad(sExtender);
    131120    Value * wordOffset = iBuilder->CreateZExt(
    132             getWordOffset(),
     121            getWordOffset(iBuilder),
    133122            iBuilder->getSizeTy()
    134123            );
     
    146135 * Called when we potentially reach a new word.  Usually followed by setExtenderUntilOffset.
    147136 */
    148 void LZ4IndexDecoderKernel::loadCurrentExtender() {
     137void LZ4IndexDecoderKernel::loadCurrentExtender(const std::unique_ptr<KernelBuilder> & iBuilder) {
    149138    iBuilder->CreateStore(
    150139            iBuilder->CreateExtractElement(extenders,
     
    158147
    159148
    160 void LZ4IndexDecoderKernel::generateProduceOutput() {
    161     Value * producedItem = getProducedItemCount("literalIndexes");
     149void LZ4IndexDecoderKernel::generateProduceOutput(const std::unique_ptr<KernelBuilder> &iBuilder) {
     150    Value * producedItem = iBuilder->getProducedItemCount("literalIndexes");
    162151
    163152#ifndef NDEBUG
     
    165154    // LiteralStart is adjusted to be relative to the block start, so that
    166155    // the output can be compared against that of the reference implementation.
    167     //iBuilder->CallPrintInt("LiteralStart", getScalarField("LiteralStart"));
    168     iBuilder->CallPrintInt("LiteralStart", iBuilder->CreateSub(
    169                 getScalarField("LiteralStart"), getScalarField("LZ4BlockStart")));
    170     iBuilder->CallPrintInt("LiteralLength", getScalarField("LiteralLength"));
    171     iBuilder->CallPrintInt("MatchOffset", getScalarField("MatchOffset"));
    172     iBuilder->CallPrintInt("MatchLength", getScalarField("MatchLength"));
     156    Value * literalStart = iBuilder->CreateSub(iBuilder->getScalarField("LiteralStart"), iBuilder->getScalarField("LZ4BlockStart"));
     157    iBuilder->CallPrintInt("LiteralStart", literalStart);
     158    iBuilder->CallPrintInt("LiteralLength", iBuilder->getScalarField("LiteralLength"));
     159    iBuilder->CallPrintInt("MatchOffset", iBuilder->getScalarField("MatchOffset"));
     160    iBuilder->CallPrintInt("MatchLength", iBuilder->getScalarField("MatchLength"));
    173161#endif
    174162    printRTDebugMsg("--------------");
     
    179167            );  // producedItem % blockWidth (as blockWidth is always a power of 2)
    180168    Value * literalStartPtr = getOutputPtr(iBuilder,
    181             getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
     169            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(0)), outputOffset);
    182170    Value * literalLengthPtr = getOutputPtr(iBuilder,
    183             getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
     171            iBuilder->getOutputStreamBlockPtr("literalIndexes", iBuilder->getInt32(1)), outputOffset);
    184172    Value * matchOffsetPtr = getOutputPtr(iBuilder,
    185             getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
     173            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(0)), outputOffset);
    186174    Value * matchLengthPtr = getOutputPtr(iBuilder,
    187             getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
    188     iBuilder->CreateStore(getScalarField("LiteralStart"), literalStartPtr);
    189     iBuilder->CreateStore(getScalarField("LiteralLength"), literalLengthPtr);
    190     iBuilder->CreateStore(getScalarField("MatchOffset"), matchOffsetPtr);
    191     iBuilder->CreateStore(getScalarField("MatchLength"), matchLengthPtr);
    192     setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
     175            iBuilder->getOutputStreamBlockPtr("matchIndexes", iBuilder->getInt32(1)), outputOffset);
     176    iBuilder->CreateStore(iBuilder->getScalarField("LiteralStart"), literalStartPtr);
     177    iBuilder->CreateStore(iBuilder->getScalarField("LiteralLength"), literalLengthPtr);
     178    iBuilder->CreateStore(iBuilder->getScalarField("MatchOffset"), matchOffsetPtr);
     179    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), matchLengthPtr);
     180    iBuilder->setProducedItemCount("literalIndexes", iBuilder->CreateAdd(producedItem, iBuilder->getSize(1)));
    193181    // matchIndexes has a fixed ratio of 1:1 w.r.t. literalIndexes.
    194182}
    195183
    196184
    197 void LZ4IndexDecoderKernel::generateDoBlockMethod() {
     185void LZ4IndexDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    198186    BasicBlock * entry_block = iBuilder->GetInsertBlock();
    199     BasicBlock * exit_block = CreateBasicBlock("exit");
     187    BasicBlock * exit_block = iBuilder->CreateBasicBlock("exit");
    200188
    201189    // %entry
     
    203191    printRTDebugMsg("entry");
    204192    // Global positions in the byte stream.
    205     Value * blockNo = getScalarField("BlockNo");
     193    Value * blockNo = iBuilder->getScalarField("BlockNo");
    206194    blockStartPos = iBuilder->CreateMul(blockNo, iBuilder->getInt32(iBuilder->getBitBlockWidth()), "blockStartPos");
    207195    extenders = iBuilder->CreateBitCast(
    208             loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
     196            iBuilder->loadInputStreamBlock("extenders", iBuilder->getInt32(0)),
    209197            VectorType::get(iBuilder->getSizeTy(), iBuilder->getBitBlockWidth() / wordWidth),
    210198            "extenders");
     
    212200    sOffset = createStackVar(iBuilder, iBuilder->getInt32Ty(), "offset");
    213201    // tempLength has different meanings in different states.
    214     sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", getScalarField("TempLength"));
    215     sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", getScalarField("TempCount"));
    216     sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", getScalarField("State"));
     202    sTempLength = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempLength", iBuilder->getScalarField("TempLength"));
     203    sTempCount = createStackVar(iBuilder, iBuilder->getInt32Ty(), "tempCount", iBuilder->getScalarField("TempCount"));
     204    sState = createStackVar(iBuilder, iBuilder->getInt8Ty(), "state", iBuilder->getScalarField("State"));
    217205    sExtender = createStackVar(iBuilder, iBuilder->getSizeTy(), "extender",
    218206            iBuilder->CreateExtractElement(extenders, iBuilder->getInt32(0)));
    219207
    220     BasicBlock * skippingBytes = CreateBasicBlock("skipping_bytes");
    221     BasicBlock * dispatch = CreateBasicBlock("dispatch");
     208    BasicBlock * skippingBytes = iBuilder->CreateBasicBlock("skipping_bytes");
     209    BasicBlock * dispatch = iBuilder->CreateBasicBlock("dispatch");
    222210
    223211    iBuilder->CreateCondBr(
    224             iBuilder->CreateICmpUGT(getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
     212            iBuilder->CreateICmpUGT(iBuilder->getScalarField("BytesToSkip"), iBuilder->getInt32(0)),
    225213            skippingBytes, dispatch
    226214            );
    227215
    228216    // %skipping_bytes
    229     generateSkippingBytes(skippingBytes, exit_block);
     217    generateSkippingBytes(iBuilder, skippingBytes, exit_block);
    230218    // Insert point is at the end of skippingBytes.
    231219    iBuilder->CreateBr(dispatch);
     
    235223
    236224    // %at_block_checksum
    237     BasicBlock * atBlockChecksum = CreateBasicBlock("at_block_checksum");
    238     generateAtBlockChecksum(atBlockChecksum, skippingBytes);
     225    BasicBlock * atBlockChecksum = iBuilder->CreateBasicBlock("at_block_checksum");
     226    generateAtBlockChecksum(iBuilder, atBlockChecksum, skippingBytes);
    239227 
    240228    // %at_block_size
    241     BasicBlock * atBlockSize = CreateBasicBlock("at_block_size");
    242     generateAtBlockSize(atBlockSize, skippingBytes, exit_block);
     229    BasicBlock * atBlockSize = iBuilder->CreateBasicBlock("at_block_size");
     230    generateAtBlockSize(iBuilder, atBlockSize, skippingBytes, exit_block);
    243231
    244232    // %at_token
    245     BasicBlock * atToken = CreateBasicBlock("at_token");
    246     generateAtToken(atToken, exit_block);
     233    BasicBlock * atToken = iBuilder->CreateBasicBlock("at_token");
     234    generateAtToken(iBuilder, atToken, exit_block);
    247235
    248236    // %extending_literal_length
    249     BasicBlock * extendingLiteralLen = CreateBasicBlock("extending_literal_length");
    250     generateExtendingLiteralLen(extendingLiteralLen, exit_block);
     237    BasicBlock * extendingLiteralLen = iBuilder->CreateBasicBlock("extending_literal_length");
     238    generateExtendingLiteralLen(iBuilder, extendingLiteralLen, exit_block);
    251239
    252240    // %at_literals
    253     BasicBlock * atLiterals = CreateBasicBlock("at_literals");
    254     generateAtLiterals(atLiterals);
     241    BasicBlock * atLiterals = iBuilder->CreateBasicBlock("at_literals");
     242    generateAtLiterals(iBuilder, atLiterals);
    255243    iBuilder->CreateBr(skippingBytes);
    256244
     
    259247    // If the whole LZ4 block is done, process the (optional) checksum.
    260248    // Otherwise, go around to process the next sequence.
    261     BasicBlock * atOffset1 = CreateBasicBlock("at_first_offset");
     249    BasicBlock * atOffset1 = iBuilder->CreateBasicBlock("at_first_offset");
    262250    iBuilder->SetInsertPoint(atOffset1);
    263251    Value * nowGlobalPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    264     BasicBlock * blockEnd_else = CreateBasicBlock("block_end_else");
     252    BasicBlock * blockEnd_else = iBuilder->CreateBasicBlock("block_end_else");
    265253    // Conditional branch inserted at the end of the last block.
    266254    iBuilder->CreateUnlikelyCondBr(
    267             iBuilder->CreateICmpEQ(nowGlobalPos, getScalarField("LZ4BlockEnd")),
     255            iBuilder->CreateICmpEQ(nowGlobalPos, iBuilder->getScalarField("LZ4BlockEnd")),
    268256            atBlockChecksum, blockEnd_else
    269257            );
    270     generateAtFirstOffset(blockEnd_else, exit_block);
     258    generateAtFirstOffset(iBuilder, blockEnd_else, exit_block);
    271259
    272260    // %at_second_offset
    273     BasicBlock * atOffset2 = CreateBasicBlock("at_second_offset");
    274     generateAtSecondOffset(atOffset2, exit_block);
     261    BasicBlock * atOffset2 = iBuilder->CreateBasicBlock("at_second_offset");
     262    generateAtSecondOffset(iBuilder, atOffset2, exit_block);
    275263
    276264    // %extending_match_length
    277     BasicBlock * extendingMatchLen = CreateBasicBlock("extending_match_length");
    278     generateExtendingMatchLen(extendingMatchLen, exit_block);
     265    BasicBlock * extendingMatchLen = iBuilder->CreateBasicBlock("extending_match_length");
     266    generateExtendingMatchLen(iBuilder, extendingMatchLen, exit_block);
    279267    iBuilder->CreateBr(atToken);
    280268
     
    301289    iBuilder->SetInsertPoint(exit_block);
    302290    printRTDebugMsg("exit");
    303     setScalarField("State", iBuilder->CreateLoad(sState));
    304     setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
    305     setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
    306     setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
     291    iBuilder->setScalarField("State", iBuilder->CreateLoad(sState));
     292    iBuilder->setScalarField("TempLength", iBuilder->CreateLoad(sTempLength));
     293    iBuilder->setScalarField("TempCount", iBuilder->CreateLoad(sTempCount));
     294    iBuilder->setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, iBuilder->getInt32(1)));
    307295    // When the kernel builder uses indirectbr, doBlock is not a separate function.
    308296    // Hence, we branch to a new basic block and fall through instead of returning.
    309     BasicBlock * end_block = CreateBasicBlock("end_of_block");
     297    BasicBlock * end_block = iBuilder->CreateBasicBlock("end_of_block");
    310298    iBuilder->CreateBr(end_block);
    311299    iBuilder->SetInsertPoint(end_block);
     
    313301
    314302
    315 void LZ4IndexDecoderKernel::generateBoundaryDetection(State state, BasicBlock * exit_block, bool updateExtenderWord=false) {
     303void LZ4IndexDecoderKernel::generateBoundaryDetection(const std::unique_ptr<KernelBuilder> & iBuilder, State state, BasicBlock * exit_block, bool updateExtenderWord) {
    316304    if (updateExtenderWord) {
    317         BasicBlock * wordBoundary_then = CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
    318         BasicBlock * blockBoundary_else = CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
    319         BasicBlock * wordBoundary_cont = CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
     305        BasicBlock * wordBoundary_then = iBuilder->CreateBasicBlock("word_boundary_then-" + StateLabels.at(state));
     306        BasicBlock * blockBoundary_else = iBuilder->CreateBasicBlock("block_boundary_else-" + StateLabels.at(state));
     307        BasicBlock * wordBoundary_cont = iBuilder->CreateBasicBlock("word_boundary_cont-" + StateLabels.at(state));
    320308        iBuilder->CreateUnlikelyCondBr(
    321                 iBuilder->CreateICmpEQ(getWordOffset(), iBuilder->getInt32(0)),
     309                iBuilder->CreateICmpEQ(getWordOffset(iBuilder), iBuilder->getInt32(0)),
    322310                wordBoundary_then, wordBoundary_cont
    323311                );
     
    331319        // Reaching word boundary but not block boundary.  Update the extender word as requested.
    332320        iBuilder->SetInsertPoint(blockBoundary_else);
    333         loadCurrentExtender();
     321        loadCurrentExtender(iBuilder);
    334322        iBuilder->CreateBr(wordBoundary_cont);
    335323
     
    337325        iBuilder->SetInsertPoint(wordBoundary_cont);
    338326    } else {
    339         BasicBlock * blockBoundary_cont = CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
     327        BasicBlock * blockBoundary_cont = iBuilder->CreateBasicBlock("block_boundary_cont-" + StateLabels.at(state));
    340328        iBuilder->CreateUnlikelyCondBr(
    341329                iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sOffset), iBuilder->getInt32(iBuilder->getBitBlockWidth())),
     
    348336
    349337
    350 void LZ4IndexDecoderKernel::generateSkippingBytes(BasicBlock * bb, BasicBlock * exit_block) {
     338void LZ4IndexDecoderKernel::generateSkippingBytes(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    351339    iBuilder->SetInsertPoint(bb);
    352340    printRTDebugMsg("skipping bytes");
     
    355343            iBuilder->getInt32(iBuilder->getBitBlockWidth()), iBuilder->CreateLoad(sOffset)
    356344            );
    357     Value * remainingBytesToSkip = getScalarField("BytesToSkip");
     345    Value * remainingBytesToSkip = iBuilder->getScalarField("BytesToSkip");
    358346    Value * advanceDist = selectMin(iBuilder, remainingBytesInBlock, remainingBytesToSkip);
    359347    remainingBytesToSkip = iBuilder->CreateSub(remainingBytesToSkip, advanceDist);
    360348    incStackVar(iBuilder, sOffset, advanceDist);
    361     setScalarField("BytesToSkip", remainingBytesToSkip);
    362 
    363     generateBoundaryDetection(State::SKIPPING_BYTES, exit_block);
     349    iBuilder->setScalarField("BytesToSkip", remainingBytesToSkip);
     350
     351    generateBoundaryDetection(iBuilder, State::SKIPPING_BYTES, exit_block);
    364352    // Falls through.
    365353}
    366354
    367355
    368 void LZ4IndexDecoderKernel::generateAtBlockSize(BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
     356void LZ4IndexDecoderKernel::generateAtBlockSize(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * skippingBytes, BasicBlock * exit_block) {
    369357    iBuilder->CreateBr(bb);
    370358    iBuilder->SetInsertPoint(bb);
     
    378366
    379367    // A do-while loop.
    380     BasicBlock * loopBody = CreateBasicBlock("blocksize_loop_body");
    381     BasicBlock * loopExit = CreateBasicBlock("blocksize_loop_exit");
     368    BasicBlock * loopBody = iBuilder->CreateBasicBlock("blocksize_loop_body");
     369    BasicBlock * loopExit = iBuilder->CreateBasicBlock("blocksize_loop_exit");
    382370    iBuilder->CreateBr(loopBody);
    383371
    384372    iBuilder->SetInsertPoint(loopBody);
    385     Value * byte = loadRawByte();
     373    Value * byte = loadRawByte(iBuilder);
    386374    Value * newTempLength = iBuilder->CreateAdd(
    387375            iBuilder->CreateShl(iBuilder->CreateLoad(sTempLength), iBuilder->getInt32(8)),
     
    401389
    402390    iBuilder->SetInsertPoint(loopExit);
    403     BasicBlock * blockSizeCompleted_then = CreateBasicBlock("blocksize_completed_then");
    404     BasicBlock * blockSizeCompleted_cont = CreateBasicBlock("blocksize_completed_cont");
     391    BasicBlock * blockSizeCompleted_then = iBuilder->CreateBasicBlock("blocksize_completed_then");
     392    BasicBlock * blockSizeCompleted_cont = iBuilder->CreateBasicBlock("blocksize_completed_cont");
    405393    iBuilder->CreateLikelyCondBr(
    406394            iBuilder->CreateICmpEQ(iBuilder->CreateLoad(sTempCount), iBuilder->getInt32(4)),
     
    413401    Value * blockSize = generateBitswap(iBuilder, iBuilder->CreateLoad(sTempLength));
    414402    Value * currentPos = iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset));
    415     setScalarField("LZ4BlockStart", currentPos);
    416     setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
     403    iBuilder->setScalarField("LZ4BlockStart", currentPos);
     404    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, blockSize));
    417405    printRTDebugInt("blockSize", blockSize);
    418406
    419     BasicBlock * uncompressedBlock_then = CreateBasicBlock("uncompressed_block_then");
    420     BasicBlock * uncompressedBlock_else = CreateBasicBlock("uncompressed_block_cont");
     407    BasicBlock * uncompressedBlock_then = iBuilder->CreateBasicBlock("uncompressed_block_then");
     408    BasicBlock * uncompressedBlock_else = iBuilder->CreateBasicBlock("uncompressed_block_cont");
    421409    iBuilder->CreateUnlikelyCondBr(
    422410            iBuilder->CreateTrunc(
     
    430418    iBuilder->SetInsertPoint(uncompressedBlock_then);
    431419    Value * realBlockSize = iBuilder->CreateXor(blockSize, iBuilder->getInt32(1L << 31));
    432     setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
    433     setScalarField("BytesToSkip", realBlockSize);
    434     setScalarField("LiteralStart", currentPos);
    435     setScalarField("LiteralLength", realBlockSize);
     420    iBuilder->setScalarField("LZ4BlockEnd", iBuilder->CreateAdd(currentPos, realBlockSize));
     421    iBuilder->setScalarField("BytesToSkip", realBlockSize);
     422    iBuilder->setScalarField("LiteralStart", currentPos);
     423    iBuilder->setScalarField("LiteralLength", realBlockSize);
    436424    // No need to set MatchLength/MatchOffset to 0, nor to produce output,
    437425    // because %atBlockChecksum will do so as the last sequence.
     
    453441    // We could be at the boundary no matter the block size is completed or not.
    454442    iBuilder->SetInsertPoint(blockSizeCompleted_cont);
    455     generateBoundaryDetection(State::AT_BLOCK_SIZE, exit_block);
     443    generateBoundaryDetection(iBuilder, State::AT_BLOCK_SIZE, exit_block);
    456444    // Falls through to %at_token.
    457445}
    458446
    459447
    460 void LZ4IndexDecoderKernel::generateAtToken(BasicBlock * bb, BasicBlock * exit_block) {
     448void LZ4IndexDecoderKernel::generateAtToken(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    461449    iBuilder->CreateBr(bb);
    462450    iBuilder->SetInsertPoint(bb);
    463451    printRTDebugMsg("reading token");
    464452
    465     Value * token = loadRawByte();
     453    Value * token = loadRawByte(iBuilder);
    466454    Value * literalLen = iBuilder->CreateZExt(
    467455        iBuilder->CreateLShr(token, iBuilder->getInt8(4)),
     
    474462    incStackVar(iBuilder, sOffset);
    475463    // Prepare extender word for scanning.
    476     loadCurrentExtender();
    477     setExtenderUntilOffset();
     464    loadCurrentExtender(iBuilder);
     465    setExtenderUntilOffset(iBuilder);
    478466    // Store the (partial) match length to be extended later.
    479     setScalarField("MatchLength", matchLen);
     467    iBuilder->setScalarField("MatchLength", matchLen);
    480468    // Use tempLength to accumulate extended lengths (until at_literals).
    481469    iBuilder->CreateStore(literalLen, sTempLength);
    482470    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_LITERAL_LENGTH), sState);
    483471
    484     generateBoundaryDetection(State::AT_TOKEN, exit_block);
     472    generateBoundaryDetection(iBuilder, State::AT_TOKEN, exit_block);
    485473    // Falls through to %extending_literal_length.
    486474}
    487475
    488476
    489 void LZ4IndexDecoderKernel::generateExtendingLiteralLen(BasicBlock * bb, BasicBlock * exit_block) {
     477void LZ4IndexDecoderKernel::generateExtendingLiteralLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    490478    iBuilder->CreateBr(bb);
    491479    iBuilder->SetInsertPoint(bb);
    492480    printRTDebugMsg("extending literal len");
    493481
    494     Value * wordOffset = getWordOffset();
    495     Value * blockOffset = getWordStartOffset();
     482    Value * wordOffset = getWordOffset(iBuilder);
     483    Value * blockOffset = getWordStartOffset(iBuilder);
    496484    Value * literalLen = iBuilder->CreateLoad(sTempLength);
    497485    Value * literalExtEnd = iBuilder->CreateTrunc(
     
    512500    Value * lastByte = iBuilder->CreateSelect(literalExtReachBoundary,
    513501            iBuilder->getInt8(0),
    514             loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
     502            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    515503    Value * literalLenExted = iBuilder->CreateICmpUGE(literalLen, iBuilder->getInt32(0xf));
    516504    literalLen = iBuilder->CreateSelect(literalLenExted,
     
    540528    iBuilder->CreateStore(newState, sState);
    541529
    542     generateBoundaryDetection(State::EXTENDING_LITERAL_LENGTH, exit_block, true);
    543     BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
     530    generateBoundaryDetection(iBuilder, State::EXTENDING_LITERAL_LENGTH, exit_block, true);
     531    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_LITERAL_LENGTH));
    544532    // Insert point is still in wordBoundary block now.
    545533    // See if there are still more extenders.
     
    551539
    552540
    553 void LZ4IndexDecoderKernel::generateAtLiterals(BasicBlock * bb) {
     541void LZ4IndexDecoderKernel::generateAtLiterals(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb) {
    554542    iBuilder->CreateBr(bb);
    555543    iBuilder->SetInsertPoint(bb);
    556544
    557     setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
    558     setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
    559     setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
     545    iBuilder->setScalarField("LiteralStart", iBuilder->CreateAdd(blockStartPos, iBuilder->CreateLoad(sOffset)));
     546    iBuilder->setScalarField("LiteralLength", iBuilder->CreateLoad(sTempLength));
     547    iBuilder->setScalarField("BytesToSkip", iBuilder->CreateLoad(sTempLength));
    560548    iBuilder->CreateStore(iBuilder->getInt8(State::AT_FIRST_OFFSET), sState);
    561549
     
    565553
    566554
    567 void LZ4IndexDecoderKernel::generateAtFirstOffset(BasicBlock * bb, BasicBlock * exit_block) {
     555void LZ4IndexDecoderKernel::generateAtFirstOffset(const std::unique_ptr<KernelBuilder> &iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    568556    iBuilder->SetInsertPoint(bb);
    569557    printRTDebugMsg("reading first offset");
    570558
    571     Value * byte = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
     559    Value * byte = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
    572560    // Use tempLength to store partial offset.
    573561    iBuilder->CreateStore(byte, sTempLength);
     
    575563    iBuilder->CreateStore(iBuilder->getInt8(State::AT_SECOND_OFFSET), sState);
    576564
    577     generateBoundaryDetection(State::AT_FIRST_OFFSET, exit_block);
     565    generateBoundaryDetection(iBuilder, State::AT_FIRST_OFFSET, exit_block);
    578566    // Falls through to %at_second_offset.
    579567}
    580568
    581569
    582 void LZ4IndexDecoderKernel::generateAtSecondOffset(BasicBlock * bb, BasicBlock * exit_block) {
     570void LZ4IndexDecoderKernel::generateAtSecondOffset(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    583571    iBuilder->CreateBr(bb);
    584572    iBuilder->SetInsertPoint(bb);
     
    586574
    587575    Value * byte1 = iBuilder->CreateLoad(sTempLength);
    588     Value * byte2 = iBuilder->CreateZExt(loadRawByte(), iBuilder->getInt32Ty());
     576    Value * byte2 = iBuilder->CreateZExt(loadRawByte(iBuilder), iBuilder->getInt32Ty());
    589577    Value * offset = iBuilder->CreateAdd(
    590578            iBuilder->CreateShl(byte2, iBuilder->getInt32(8)),
    591579            byte1
    592580            );
    593     setScalarField("MatchOffset", offset);
     581    iBuilder->setScalarField("MatchOffset", offset);
    594582    incStackVar(iBuilder, sOffset);
    595583    // Prepare extender word and tempLength for extending.
    596     loadCurrentExtender();
    597     setExtenderUntilOffset();
    598     iBuilder->CreateStore(getScalarField("MatchLength"), sTempLength);
     584    loadCurrentExtender(iBuilder);
     585    setExtenderUntilOffset(iBuilder);
     586    iBuilder->CreateStore(iBuilder->getScalarField("MatchLength"), sTempLength);
    599587    iBuilder->CreateStore(iBuilder->getInt8(State::EXTENDING_MATCH_LENGTH), sState);
    600588
    601     generateBoundaryDetection(State::AT_SECOND_OFFSET, exit_block);
     589    generateBoundaryDetection(iBuilder, State::AT_SECOND_OFFSET, exit_block);
    602590    // Falls through to %extending_match_length.
    603591}
    604592
    605593
    606 void LZ4IndexDecoderKernel::generateExtendingMatchLen(BasicBlock * bb, BasicBlock * exit_block) {
     594void LZ4IndexDecoderKernel::generateExtendingMatchLen(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * exit_block) {
    607595    iBuilder->CreateBr(bb);
    608596    iBuilder->SetInsertPoint(bb);
    609597    printRTDebugMsg("extending match length");
    610598    printGlobalPos();
    611     printRTDebugInt("rawbyte", loadRawByte());
     599    printRTDebugInt("rawbyte", loadRawByte(iBuilder));
    612600    printRTDebugInt("extword", iBuilder->CreateLoad(sExtender));
    613601
    614     Value * wordOffset = getWordOffset();
    615     Value * blockOffset = getWordStartOffset();
     602    Value * wordOffset = getWordOffset(iBuilder);
     603    Value * blockOffset = getWordStartOffset(iBuilder);
    616604    Value * matchLen = iBuilder->CreateLoad(sTempLength);
    617605    Value * matchExtEnd = iBuilder->CreateTrunc(
     
    633621    Value * lastByte = iBuilder->CreateSelect(matchExtReachBoundary,
    634622            iBuilder->getInt8(0),
    635             loadRawByte(iBuilder->CreateAdd(blockOffset, loadOffset)));
     623            loadRawByte(iBuilder, iBuilder->CreateAdd(blockOffset, loadOffset)));
    636624    Value * matchLenExted = iBuilder->CreateICmpUGE(matchLen, iBuilder->getInt32(0xf));
    637625    matchLen = iBuilder->CreateSelect(matchLenExted,
     
    657645
    658646    Value * unfinished = iBuilder->CreateAnd(matchExtReachBoundary, matchLenExted);
    659     BasicBlock * output_then = CreateBasicBlock("output_then");
    660     BasicBlock * output_cont = CreateBasicBlock("output_cont");
     647    BasicBlock * output_then = iBuilder->CreateBasicBlock("output_then");
     648    BasicBlock * output_cont = iBuilder->CreateBasicBlock("output_cont");
    661649    iBuilder->CreateLikelyCondBr(
    662650            iBuilder->CreateNot(unfinished),
     
    666654    iBuilder->CreateStore(iBuilder->getInt8(State::AT_TOKEN), sState);
    667655    matchLen = iBuilder->CreateAdd(matchLen, iBuilder->getInt32(4));    // Add the constant at the end.
    668     setScalarField("MatchLength", matchLen);
    669     generateProduceOutput();
     656    iBuilder->setScalarField("MatchLength", matchLen);
     657    generateProduceOutput(iBuilder);
    670658    iBuilder->CreateBr(output_cont);
    671659
    672660    iBuilder->SetInsertPoint(output_cont);
    673     generateBoundaryDetection(State::EXTENDING_MATCH_LENGTH, exit_block, true);
    674     BasicBlock * cont_block = CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
     661    generateBoundaryDetection(iBuilder, State::EXTENDING_MATCH_LENGTH, exit_block, true);
     662    BasicBlock * cont_block = iBuilder->CreateBasicBlock("finished_" + StateLabels.at(State::EXTENDING_MATCH_LENGTH));
    675663    // Insert point is still in wordBoundary block now.
    676664    // See if there are still more extenders.
     
    681669
    682670
    683 void LZ4IndexDecoderKernel::generateAtBlockChecksum(BasicBlock * bb, BasicBlock * skippingBytes) {
     671void LZ4IndexDecoderKernel::generateAtBlockChecksum(const std::unique_ptr<KernelBuilder> & iBuilder, BasicBlock * bb, BasicBlock * skippingBytes) {
    684672    // No branch here as we have made a conditional branch outside.
    685673    iBuilder->SetInsertPoint(bb);
     
    687675
    688676    // Produce the partial output (fill matchIndexes with 0).
    689     setScalarField("MatchOffset", iBuilder->getInt32(0));
    690     setScalarField("MatchLength", iBuilder->getInt32(0));
    691     generateProduceOutput();
    692 
    693     BasicBlock * hasChecksum_then = CreateBasicBlock("has_checksum_then");
    694     BasicBlock * hasChecksum_cont = CreateBasicBlock("has_checksum_cont");
     677    iBuilder->setScalarField("MatchOffset", iBuilder->getInt32(0));
     678    iBuilder->setScalarField("MatchLength", iBuilder->getInt32(0));
     679    generateProduceOutput(iBuilder);
     680
     681    BasicBlock * hasChecksum_then = iBuilder->CreateBasicBlock("has_checksum_then");
     682    BasicBlock * hasChecksum_cont = iBuilder->CreateBasicBlock("has_checksum_cont");
    695683
    696684    iBuilder->CreateStore(iBuilder->getInt8(State::AT_BLOCK_SIZE), sState);
    697     iBuilder->CreateCondBr(getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
     685    iBuilder->CreateCondBr(iBuilder->getScalarField("hasBlockChecksum"), hasChecksum_then, hasChecksum_cont);
    698686
    699687    iBuilder->SetInsertPoint(hasChecksum_then);
    700     setScalarField("BytesToSkip", iBuilder->getInt32(4));
     688    iBuilder->setScalarField("BytesToSkip", iBuilder->getInt32(4));
    701689    iBuilder->CreateBr(skippingBytes);
    702690    // Boundary detection will be done in skipping_bytes.
  • icGREP/icgrep-devel/icgrep/kernels/lz4_index_decoder.h

    r5436 r5440  
    2626    LZ4IndexDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    2727protected:
    28     void generateDoBlockMethod() override;
     28    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    2929private:
    30     bool useIndirectBr() const override;
    3130
    3231    enum State : unsigned char {
     
    6867
    6968    // Helper methods.
    70     llvm::Value * getWordOffset();
    71     llvm::Value * getWordStartOffset();
    72     llvm::Value * loadRawByte(llvm::Value * offset);
    73     void setExtenderUntilOffset();
    74     void loadCurrentExtender();
     69    llvm::Value * getWordOffset(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     70    llvm::Value * getWordStartOffset(const std::unique_pt