Changeset 5411


Ignore:
Timestamp:
Apr 18, 2017, 12:51:26 PM (2 years ago)
Author:
nmedfort
Message:

Potential bug fix for 32-bit. Modified MRemap to check for Linux OS support. Added MMapAdvise to CBuilder.

Location:
icGREP/icgrep-devel/icgrep
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5403 r5411  
    1717#include <sys/mman.h>
    1818#include <errno.h>
     19#include <llvm/ADT/Triple.h>
    1920
    2021using namespace llvm;
     
    8081void CBuilder::CallPrintInt(const std::string & name, Value * const value) {
    8182    Constant * printRegister = mMod->getFunction("PrintInt");
     83    IntegerType * int64Ty = getInt64Ty();
    8284    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    83         FunctionType *FT = FunctionType::get(getVoidTy(), { PointerType::get(getInt8Ty(), 0), getSizeTy() }, false);
     85        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt8PtrTy(), int64Ty }, false);
    8486        Function * function = Function::Create(FT, Function::InternalLinkage, "PrintInt", mMod);
    8587        auto arg = function->arg_begin();
     
    102104    Value * num = nullptr;
    103105    if (value->getType()->isPointerTy()) {
    104         num = CreatePtrToInt(value, getSizeTy());
     106        num = CreatePtrToInt(value, int64Ty);
    105107    } else {
    106         num = CreateZExtOrBitCast(value, getSizeTy());
     108        num = CreateZExtOrBitCast(value, int64Ty);
    107109    }
    108110    assert (num->getType()->isIntegerTy());
     
    169171}
    170172
    171 #ifdef __APPLE__
    172 #define MAP_ANONYMOUS MAP_ANON
    173 #endif
    174 
    175173Value * CBuilder::CreateAnonymousMMap(Value * size) {
    176174    PointerType * const voidPtrTy = getVoidPtrTy();
     
    179177    size = CreateZExtOrTrunc(size, sizeTy);
    180178    ConstantInt * const prot =  ConstantInt::get(intTy, PROT_READ | PROT_WRITE);
    181     ConstantInt * const flags =  ConstantInt::get(intTy, MAP_PRIVATE | MAP_ANONYMOUS);
     179    ConstantInt * const flags =  ConstantInt::get(intTy, MAP_PRIVATE | MAP_ANON);
    182180    ConstantInt * const fd =  ConstantInt::get(intTy, -1);
    183181    Constant * const offset = ConstantInt::get(sizeTy, 0);
    184     return CreateMMap(Constant::getNullValue(voidPtrTy), size, prot, flags, fd, offset);
     182    return CreateMMap(ConstantPointerNull::getNullValue(voidPtrTy), size, prot, flags, fd, offset);
    185183}
    186184
     
    193191    ConstantInt * const flags =  ConstantInt::get(intTy, MAP_PRIVATE);
    194192    Constant * const offset = ConstantInt::get(sizeTy, 0);
    195     return CreateMMap(Constant::getNullValue(voidPtrTy), size, prot, flags, fd, offset);
     193    return CreateMMap(ConstantPointerNull::getNullValue(voidPtrTy), size, prot, flags, fd, offset);
    196194}
    197195
     
    212210}
    213211
     212/*
     213    MADV_NORMAL
     214        No special treatment. This is the default.
     215    MADV_RANDOM
     216        Expect page references in random order. (Hence, read ahead may be less useful than normally.)
     217    MADV_SEQUENTIAL
     218        Expect page references in sequential order. (Hence, pages in the given range can be aggressively read ahead, and may be freed
     219        soon after they are accessed.)
     220    MADV_WILLNEED
     221        Expect access in the near future. (Hence, it might be a good idea to read some pages ahead.)
     222    MADV_DONTNEED
     223        Do not expect access in the near future. (For the time being, the application is finished with the given range, so the kernel
     224        can free resources associated with it.) Subsequent accesses of pages in this range will succeed, but will result either in
     225        reloading of the memory contents from the underlying mapped file (see mmap(2)) or zero-fill-on-demand pages for mappings
     226        without an underlying file.
     227*/
     228
     229Value * CBuilder::CreateMMapAdvise(Value * addr, Value * length, std::initializer_list<MADV> advice) {
     230    Triple T(mMod->getTargetTriple());
     231    Value * result = nullptr;
     232    if (T.isOSLinux()) {
     233        DataLayout DL(mMod);
     234        IntegerType * const intTy = getIntPtrTy(DL);
     235        IntegerType * const sizeTy = getSizeTy();
     236        PointerType * const voidPtrTy = getVoidPtrTy();
     237        Function * MAdviseFunc = mMod->getFunction("madvise");
     238        if (LLVM_UNLIKELY(MAdviseFunc == nullptr)) {
     239            FunctionType * fty = FunctionType::get(intTy, {voidPtrTy, sizeTy, intTy}, false);
     240            MAdviseFunc = Function::Create(fty, Function::ExternalLinkage, "madvise", mMod);
     241        }
     242        addr = CreatePointerCast(addr, voidPtrTy);
     243        length = CreateZExtOrTrunc(length, sizeTy);
     244        int adviceFlags = 0;
     245        for (const MADV adv : advice) {
     246            switch (adv) {
     247                case MADV::NORMAL: adviceFlags |= MADV_NORMAL; break;
     248                case MADV::RANDOM: adviceFlags |= MADV_RANDOM; break;
     249                case MADV::SEQUENTIAL: adviceFlags |= MADV_SEQUENTIAL; break;
     250                case MADV::DONTNEED: adviceFlags |= MADV_DONTNEED; break;
     251                case MADV::WILLNEED: adviceFlags |= MADV_WILLNEED; break;
     252//                case MADV::REMOVE: adviceFlags |= MADV_REMOVE; break;
     253//                case MADV::DONTFORK: adviceFlags |= MADV_DONTFORK; break;
     254//                case MADV::DOFORK: adviceFlags |= MADV_DOFORK; break;
     255//                case MADV::HWPOISON: adviceFlags |= MADV_HWPOISON; break;
     256//                case MADV::MERGEABLE: adviceFlags |= MADV_MERGEABLE; break;
     257//                case MADV::UNMERGEABLE: adviceFlags |= MADV_UNMERGEABLE; break;
     258//                case MADV::HUGEPAGE: adviceFlags |= MADV_HUGEPAGE; break;
     259//                case MADV::NOHUGEPAGE: adviceFlags |= MADV_NOHUGEPAGE; break;
     260//                case MADV::DONTDUMP: adviceFlags |= MADV_DONTDUMP; break;
     261//                case MADV::DODUMP: adviceFlags |= MADV_DODUMP; break;
     262            }
     263        }
     264        result = CreateCall(MAdviseFunc, {addr, length, ConstantInt::get(intTy, adviceFlags)});
     265        if (codegen::EnableAsserts) {
     266            CreateAssert(CreateICmpEQ(result, ConstantInt::getNullValue(result->getType())), "CreateMMapAdvise: failed");
     267        }
     268    }
     269    return result;
     270}
     271
    214272Value * CBuilder::CheckMMapSuccess(Value * const addr) {
    215273    DataLayout DL(mMod);
    216     IntegerType * const ty = getIntPtrTy(DL);
    217     return CreateICmpNE(CreatePtrToInt(addr, ty), ConstantInt::getAllOnesValue(ty)); // MAP_FAILED = -1
    218 }
    219 
    220 #ifndef __APPLE__
    221 Value * CBuilder::CreateMRemap(Value * addr, Value * oldSize, Value * newSize, const bool mayMove) {
    222     DataLayout DL(mMod);
    223     PointerType * const voidPtrTy = getVoidPtrTy();
    224274    IntegerType * const intTy = getIntPtrTy(DL);
    225     IntegerType * const sizeTy = getSizeTy();
    226     Function * fMRemap = mMod->getFunction("mremap");
    227     if (LLVM_UNLIKELY(fMRemap == nullptr)) {
    228         FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, sizeTy, intTy}, false);
    229         fMRemap = Function::Create(fty, Function::ExternalLinkage, "mremap", mMod);
    230     }   
    231     addr = CreatePointerCast(addr, voidPtrTy);
    232     oldSize = CreateZExtOrTrunc(oldSize, sizeTy);
    233     newSize = CreateZExtOrTrunc(newSize, sizeTy);
    234     ConstantInt * const flags = ConstantInt::get(intTy, mayMove ? MREMAP_MAYMOVE : 0);
    235     Value * ptr = CreateCall(fMRemap, {addr, oldSize, newSize, flags});
    236     if (codegen::EnableAsserts) {
    237         CreateAssert(CheckMMapSuccess(ptr), "CreateMRemap: mremap failed to allocate memory");
     275    return CreateICmpNE(CreatePtrToInt(addr, intTy), ConstantInt::getAllOnesValue(intTy)); // MAP_FAILED = -1
     276}
     277
     278Value * CBuilder::CreateMRemap(Value * addr, Value * oldSize, Value * newSize) {
     279    Triple T(mMod->getTargetTriple());
     280    Value * ptr = nullptr;
     281    if (T.isOSLinux()) {
     282        DataLayout DL(mMod);
     283        PointerType * const voidPtrTy = getVoidPtrTy();
     284        IntegerType * const sizeTy = getSizeTy();
     285        IntegerType * const intTy = getIntPtrTy(DL);
     286        Function * fMRemap = mMod->getFunction("mremap");
     287        if (LLVM_UNLIKELY(fMRemap == nullptr)) {
     288            FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy, sizeTy, intTy}, false);
     289            fMRemap = Function::Create(fty, Function::ExternalLinkage, "mremap", mMod);
     290        }
     291        addr = CreatePointerCast(addr, voidPtrTy);
     292        oldSize = CreateZExtOrTrunc(oldSize, sizeTy);
     293        newSize = CreateZExtOrTrunc(newSize, sizeTy);
     294        ConstantInt * const flags = ConstantInt::get(intTy, MREMAP_MAYMOVE);
     295        ptr = CreateCall(fMRemap, {addr, oldSize, newSize, flags});
     296        if (codegen::EnableAsserts) {
     297            CreateAssert(CheckMMapSuccess(ptr), "CreateMRemap: mremap failed to allocate memory");
     298        }
     299    } else { // no OS mremap support
     300        ptr = CreateAnonymousMMap(newSize);
     301        CreateMemCpy(ptr, addr, oldSize, getpagesize());
     302        CreateMUnmap(addr, oldSize);
    238303    }
    239304    return ptr;
    240305}
    241 #endif
    242306
    243307Value * CBuilder::CreateMUnmap(Value * addr, Value * size) {
    244     DataLayout DL(mMod);
    245308    IntegerType * const sizeTy = getSizeTy();
    246309    PointerType * const voidPtrTy = getVoidPtrTy();
    247310    Function * fMUnmap = mMod->getFunction("munmap");
    248311    if (LLVM_UNLIKELY(fMUnmap == nullptr)) {
     312        DataLayout DL(mMod);
    249313        IntegerType * const intTy = getIntPtrTy(DL);
    250314        FunctionType * fty = FunctionType::get(intTy, {voidPtrTy, sizeTy}, false);
    251315        fMUnmap = Function::Create(fty, Function::ExternalLinkage, "munmap", mMod);
    252316    }
     317    if (codegen::EnableAsserts) {
     318        Value * const pageOffset = CreateURem(CreatePtrToInt(addr, sizeTy), getSize(getpagesize()));
     319        CreateAssert(CreateICmpEQ(pageOffset, getSize(0)), "CreateMUnmap: addr must be a multiple of the page size");
     320    }
    253321    addr = CreatePointerCast(addr, voidPtrTy);
    254322    size = CreateZExtOrTrunc(size, sizeTy);
    255     return CreateCall(fMUnmap, {addr, size});
     323    CallInst * result = CreateCall(fMUnmap, {addr, size});
     324    if (codegen::EnableAsserts) {
     325        CreateAssert(CreateICmpEQ(result, ConstantInt::getNullValue(result->getType())), "CreateMUnmap: failed");
     326    }
     327    return result;
    256328}
    257329
     
    327399
    328400PointerType * CBuilder::getVoidPtrTy() const {
    329     return TypeBuilder<void *, false>::get(getContext());
     401    return TypeBuilder<void *, true>::get(getContext());
    330402}
    331403
     
    364436Value * CBuilder::CreateFReadCall(Value * ptr, Value * size, Value * nitems, Value * stream) {
    365437    Function * fReadFunc = mMod->getFunction("fread");
     438    PointerType * const voidPtrTy = getVoidPtrTy();
    366439    if (fReadFunc == nullptr) {
    367         FunctionType * fty = FunctionType::get(getSizeTy(), {getVoidPtrTy(), getSizeTy(), getSizeTy(), getFILEptrTy()}, false);
     440        IntegerType * const sizeTy = getSizeTy();
     441        FunctionType * fty = FunctionType::get(sizeTy, {voidPtrTy, sizeTy, sizeTy, getFILEptrTy()}, false);
    368442        fReadFunc = Function::Create(fty, Function::ExternalLinkage, "fread", mMod);
    369443        fReadFunc->setCallingConv(CallingConv::C);
    370444    }
     445    ptr = CreatePointerCast(ptr, voidPtrTy);
    371446    return CreateCall(fReadFunc, {ptr, size, nitems, stream});
    372447}
     
    374449Value * CBuilder::CreateFWriteCall(Value * ptr, Value * size, Value * nitems, Value * stream) {
    375450    Function * fWriteFunc = mMod->getFunction("fwrite");
     451    PointerType * const voidPtrTy = getVoidPtrTy();
    376452    if (fWriteFunc == nullptr) {
    377         FunctionType * fty = FunctionType::get(getSizeTy(), {getVoidPtrTy(), getSizeTy(), getSizeTy(), getFILEptrTy()}, false);
     453        IntegerType * const sizeTy = getSizeTy();
     454        FunctionType * fty = FunctionType::get(sizeTy, {voidPtrTy, sizeTy, sizeTy, getFILEptrTy()}, false);
    378455        fWriteFunc = Function::Create(fty, Function::ExternalLinkage, "fwrite", mMod);
    379456        fWriteFunc->setCallingConv(CallingConv::C);
    380457    }
     458    ptr = CreatePointerCast(ptr, voidPtrTy);
    381459    return CreateCall(fWriteFunc, {ptr, size, nitems, stream});
    382460}
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5398 r5411  
    8181    llvm::Value * CreateFileSourceMMap(llvm::Value * fd, llvm::Value * size);
    8282
     83    enum class MADV {
     84        NORMAL
     85        , RANDOM
     86        , SEQUENTIAL
     87        , WILLNEED
     88        , DONTNEED
     89//        , REMOVE
     90//        , DONTFORK
     91//        , DOFORK
     92//        , HWPOISON
     93//        , MERGEABLE
     94//        , UNMERGEABLE
     95//        , HUGEPAGE
     96//        , NOHUGEPAGE
     97//        , DONTDUMP
     98//        , DODUMP
     99    };
     100
     101    llvm::Value * CreateMMapAdvise(llvm::Value * addr, llvm::Value * length, MADV advice) {
     102        return CreateMMapAdvise(addr, length, { advice });
     103    }
     104
     105    llvm::Value * CreateMMapAdvise(llvm::Value * addr, llvm::Value * length, std::initializer_list<MADV> advice);
     106
    83107    llvm::Value * CreateMMap(llvm::Value * const addr, llvm::Value * size, llvm::Value * const prot, llvm::Value * const flags, llvm::Value * const fd, llvm::Value * const offset);
    84108
    85109    llvm::Value * CheckMMapSuccess(llvm::Value * const addr);
    86110
    87     llvm::Value * CreateMRemap(llvm::Value * addr, llvm::Value * oldSize, llvm::Value * newSize, const bool mayMove = true);
     111    llvm::Value * CreateMRemap(llvm::Value * addr, llvm::Value * oldSize, llvm::Value * newSize);
    88112
    89113    llvm::Value * CreateMUnmap(llvm::Value * addr, llvm::Value * size);
    90 
    91 
    92114
    93115    //  Posix thread (pthread.h) functions.
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5410 r5411  
    301301    assert (line_end <= filesize);
    302302
     303  //  errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
     304
    303305    #ifdef CUDA_ENABLED
    304306    if (codegen::NVPTX){
     
    532534        pxDriver.generatePipelineIR();
    533535
    534         iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
     536        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall("matchedLineCount"));
    535537
    536538        pxDriver.linkAndFinalize();
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5408 r5411  
    1515
    1616static const auto INIT_SUFFIX = "_Init";
     17
     18static const auto TERMINATE_SUFFIX = "_Terminate";
    1719
    1820static const auto DO_SEGMENT_SUFFIX = "_DoSegment";
     
    99101    for (auto binding : mStreamSetOutputs) {
    100102        args->setName(binding.name + "ConsumerLogicalSegments");       
    101 //        args->addAttr(Attribute::NoCapture);
    102 //        args->addAttr(Attribute::ReadOnly);
    103103        ++args;
    104104    }
     
    119119        (++args)->setName(input.name + "AvailableItems");
    120120    }
     121
     122    // Create the terminate function prototype
     123    FunctionType * terminateType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
     124    Function * terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
     125    terminateFunc->setCallingConv(CallingConv::C);
     126    terminateFunc->setDoesNotThrow();
     127    terminateFunc->setDoesNotCapture(1);
     128    args = terminateFunc->arg_begin();
     129    args->setName("self");
    121130
    122131    /// INVESTIGATE: replace the accumulator methods with a single Exit method that handles any clean up and returns
     
    168177    return f;
    169178}
     179
     180Function * KernelInterface::getTerminateFunction() const {
     181    const auto name = getName() + TERMINATE_SUFFIX;
     182    Function * f = iBuilder->getModule()->getFunction(name);
     183    if (LLVM_UNLIKELY(f == nullptr)) {
     184        llvm::report_fatal_error("Cannot find " + name);
     185    }
     186    return f;
     187}
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5408 r5411  
    104104    virtual void initializeInstance() = 0;
    105105
     106    virtual void terminateInstance() = 0;
     107
    106108    void setInitialArguments(std::vector<llvm::Value *> args);
    107109
     
    139141
    140142    llvm::Function * getAccumulatorFunction(const std::string & accumName) const;
     143
     144    llvm::Function * getTerminateFunction() const;
    141145
    142146protected:
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5408 r5411  
    187187
    188188    prepareKernel();
    189     return new Module(cacheName.str(), iBuilder->getContext());
     189
     190    Module * const m = new Module(cacheName.str(), iBuilder->getContext());
     191    m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
     192    return m;
    190193}
    191194
     
    204207
    205208void KernelBuilder::generateKernel() {
    206     // If the module id is cannot uniquely identify this kernel, "generateKernelSignature()" will have already
     209    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
    207210    // generated the unoptimized IR.
    208211    if (!mIsGenerated) {
     212        auto saveInstance = getInstance();
    209213        auto savePoint = iBuilder->saveIP();
    210214        addKernelDeclarations(iBuilder->getModule());
    211215        callGenerateInitMethod();
    212         callGenerateDoSegmentMethod();
     216        callGenerateDoSegmentMethod();       
    213217        // Implement the accumulator get functions
    214218        for (auto binding : mScalarOutputs) {
     
    220224            iBuilder->CreateRet(retVal);
    221225        }
     226        callGenerateTerminateMethod();
    222227        iBuilder->restoreIP(savePoint);
     228        setInstance(saveInstance);
    223229        mIsGenerated = true;       
    224230    }
     
    252258    }
    253259    generateInitMethod();
     260    iBuilder->CreateRetVoid();
     261}
     262
     263void KernelBuilder::callGenerateTerminateMethod() {
     264    mCurrentMethod = getTerminateFunction();
     265    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
     266    auto args = mCurrentMethod->arg_begin();
     267    setInstance(&*(args++));
     268    generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
    254269    iBuilder->CreateRetVoid();
    255270}
     
    502517}
    503518
    504 CallInst * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
    505     return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
     519CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
     520    return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
    506521}
    507522
     
    519534
    520535void KernelBuilder::initializeInstance() {
     536
     537
    521538    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
    522539        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
     
    558575    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
    559576    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
    560     Constant * const sizeOfSizePtrTy = ConstantExpr::getSizeOf(sizePtrTy);
    561 
    562577    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    563578        const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     579        const auto n = consumers.size();
    564580        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
    565         Value * const numPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    566         const auto n = consumers.size();
    567         const auto consumerCount = iBuilder->getSize(n);
    568         iBuilder->CreateStore(consumerCount, numPtr);
    569         Value * const consumerPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    570         Value * const segNoPtrs = iBuilder->CreatePointerCast(iBuilder->CreateMalloc(ConstantExpr::getMul(consumerCount, sizeOfSizePtrTy)), sizePtrPtrTy);
    571         iBuilder->CreateStore(segNoPtrs, consumerPtr);
     581        Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
    572582        for (unsigned i = 0; i < n; ++i) {
    573583            KernelBuilder * const consumer = consumers[i];
    574584            assert (consumer->getInstance());
    575             iBuilder->CreateStore(consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR), iBuilder->CreateGEP(segNoPtrs, iBuilder->getSize(i)));
    576         }
     585            Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
     586            iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
     587        }
     588        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     589        iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
     590        Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     591        iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
    577592        args.push_back(outputConsumers);
    578593    }
    579594    iBuilder->CreateCall(getInitFunction(), args);
     595}
     596
     597void KernelBuilder::terminateInstance() {
     598    iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
    580599}
    581600
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5408 r5411  
    8080    void initializeInstance() final;
    8181
     82    void terminateInstance() final;
     83
    8284    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
    8385
     
    121123        iBuilder->CreateStore(value, getScalarFieldPtr(index));
    122124    }
    123 
    124125
    125126    // Synchronization actions for executing a kernel for a particular logical segment.
     
    163164    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    164165
    165     llvm::CallInst * createGetAccumulatorCall(llvm::Value * self, const std::string & accumName) const;
     166    llvm::CallInst * createGetAccumulatorCall(const std::string & accumName) const;
    166167
    167168    virtual ~KernelBuilder() = 0;
     
    201202    virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    202203
     204    virtual void generateTerminateMethod() { }
     205
    203206    // Add an additional scalar field to the KernelState struct.
    204207    // Must occur before any call to addKernelDeclarations or createKernelModule.
     
    284287    void callGenerateDoSegmentMethod();
    285288
     289    void callGenerateTerminateMethod();
    286290
    287291private:
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5408 r5411  
    2222using FlatSet = boost::container::flat_set<Value>;
    2323
    24 Function * makeThreadFunction(const std::string & name, Module * const m) {
    25     LLVMContext & C = m->getContext();
    26     Type * const voidTy = Type::getVoidTy(C);
    27     PointerType * const int8PtrTy = Type::getInt8PtrTy(C);
    28     Function * const f = Function::Create(FunctionType::get(voidTy, {int8PtrTy}, false), Function::InternalLinkage, name, m);
     24Function * makeThreadFunction(IDISA::IDISA_Builder * const b, const std::string & name) {
     25    Function * const f = Function::Create(FunctionType::get(b->getVoidTy(), {b->getVoidPtrTy()}, false), Function::InternalLinkage, name, b->getModule());
    2926    f->setCallingConv(CallingConv::C);
    3027    f->arg_begin()->setName("input");
     
    4744    IntegerType * const sizeTy = iBuilder->getSizeTy();
    4845    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    49     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    5046    const unsigned threads = codegen::ThreadNum;
    5147    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    5248
    53     assert (!kernels.empty());
    54 
    5549    std::vector<Type *> structTypes;
    5650
     
    6357    StructType * const threadStructType = StructType::get(sharedStructType->getPointerTo(), sizeTy, nullptr);
    6458
    65     Function * const threadFunc = makeThreadFunction("segment", m);
     59    Function * const threadFunc = makeThreadFunction(iBuilder, "segment");
    6660
    6761    // -------------------------------------------------------------------------------------------------------------------------
     
    130124        iBuilder->SetInsertPoint(segmentLoopBody);
    131125        const auto & inputs = kernel->getStreamInputs();
    132         const auto & outputs = kernel->getStreamOutputs();
    133126        std::vector<Value *> args = {kernel->getInstance(), doFinal};
    134127        for (unsigned i = 0; i < inputs.size(); ++i) {
     
    140133        }
    141134
    142         CallInst * ci = kernel->createDoSegmentCall(args);
    143         // TODO: investigate whether this actually inlines the function call correctly despite being in a seperate module.
    144         ci->addAttribute(AttributeSet::FunctionIndex, Attribute::AlwaysInline);
    145 
     135        kernel->createDoSegmentCall(args);
    146136        if (!kernel->hasNoTerminateAttribute()) {
    147137            doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal());
    148138        }
     139
     140        const auto & outputs = kernel->getStreamOutputs();
    149141        for (unsigned i = 0; i < outputs.size(); ++i) {
    150142            Value * const produced = kernel->getProducedItemCount(outputs[i].name, doFinal);
     
    203195    }
    204196
    205     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     197    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
    206198    for (unsigned i = 0; i < threads; ++i) {
    207199        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
     
    219211    IntegerType * const sizeTy = iBuilder->getSizeTy();
    220212    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    221     PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    222213    ConstantInt * bufferSegments = ConstantInt::get(sizeTy, codegen::BufferSegments - 1);
    223214    ConstantInt * segmentItems = ConstantInt::get(sizeTy, codegen::SegmentSize * iBuilder->getBitBlockWidth());
     
    295286        const auto & inputs = kernel->getStreamInputs();
    296287
    297         Function * const threadFunc = makeThreadFunction("ppt:" + kernel->getName(), m);
     288        Function * const threadFunc = makeThreadFunction(iBuilder, "ppt:" + kernel->getName());
    298289
    299290         // Create the basic blocks for the thread function.
     
    394385    }
    395386
    396     AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
     387    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
    397388    for (unsigned i = 0; i < n; ++i) {
    398389        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
     
    409400    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    410401    Function * main = entryBlock->getParent();
    411 
    412     assert (!kernels.empty());
    413402
    414403    // Create the basic blocks for the loop.
     
    432421            args.push_back(f->second);
    433422        }
     423        Value * const segNo = kernel->acquireLogicalSegmentNo();
    434424        kernel->createDoSegmentCall(args);
    435425        if (!kernel->hasNoTerminateAttribute()) {
     
    444434        }
    445435
    446         Value * const segNo = kernel->acquireLogicalSegmentNo();
    447436        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    448437    }
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5398 r5411  
    4545    BasicBlock * const scanWordExit = CreateBasicBlock("ScanWordExit");
    4646    IntegerType * const sizeTy = iBuilder->getSizeTy();
    47     PointerType * const codeUnitTy = iBuilder->getIntNTy(mCodeUnitWidth)->getPointerTo();
    4847    const unsigned fieldCount = iBuilder->getBitBlockWidth() / sizeTy->getBitWidth();
    4948    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
     
    5251    Value * const lastRecordStart = getProcessedItemCount("InputStream");
    5352    Value * const lastRecordNum = getScalarField("LineNum");
    54     Value * const inputStream = iBuilder->CreatePointerCast(getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0)), codeUnitTy);
    5553
    5654    Value * const matches = iBuilder->CreateBitCast(loadInputStreamBlock("matchResult", iBuilder->getInt32(0)), scanwordVectorType);
     
    127125            matchRecordStart->addIncoming(priorRecordStart, prior_breaks_block);
    128126            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
    129 
    130127            Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord));
    131             Function * const matcher = m->getFunction("matcher");
    132             assert (matcher);
    133             switch (mGrepType) {
    134                 case GrepType::Normal:
    135                     iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream, getBufferedSize("InputStream"), getScalarField("FileIdx")});
    136                     break;
    137                 case GrepType::NameExpression:
    138                 case GrepType::PropertyValue:
    139                     iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream});
    140                     break;
    141                 default: break;
     128
     129            Function * const matcher = m->getFunction("matcher"); assert (matcher);
     130            auto args = matcher->arg_begin();
     131            Value * const mrn = iBuilder->CreateZExtOrTrunc(matchRecordNum, args->getType());
     132            Value * const mrs = iBuilder->CreateZExtOrTrunc(matchRecordStart, (++args)->getType());
     133            Value * const mre = iBuilder->CreateZExtOrTrunc(matchRecordEnd, (++args)->getType());
     134            Value * const inputStream = getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0));
     135            Value * const is = iBuilder->CreatePointerCast(inputStream, (++args)->getType());
     136            if (mGrepType == GrepType::Normal) {
     137                Value * const sz = iBuilder->CreateZExtOrTrunc(getBufferedSize("InputStream"), (++args)->getType());
     138                Value * const fi = iBuilder->CreateZExtOrTrunc(getScalarField("FileIdx"), (++args)->getType());
     139                iBuilder->CreateCall(matcher, {mrn, mrs, mre, is, sz, fi});
     140            } else {
     141                iBuilder->CreateCall(matcher, {mrn, mrs, mre, is});
    142142            }
    143143
     
    204204    {},
    205205    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    206 , mGrepType(grepType)
    207 , mCodeUnitWidth(codeUnitWidth) {
    208 
    209 }
    210 
    211 }
     206, mGrepType(grepType) {
     207
     208}
     209
     210}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5398 r5411  
    1616class ScanMatchKernel final : public BlockOrientedKernel {
    1717public:
    18     ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType, unsigned codeUnitWidth);
    19     bool moduleIDisSignature() override {return true;}
     18    ScanMatchKernel(IDISA::IDISA_Builder * const iBuilder, const GrepType grepType, const unsigned codeUnitWidth);
     19    bool moduleIDisSignature() override { return true; }
    2020protected:
    2121    void generateDoBlockMethod() override;
     
    2525private:
    2626    const GrepType      mGrepType;
    27     const unsigned      mCodeUnitWidth;
    2827};
    2928}
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5408 r5411  
    3535    iBuilder->SetInsertPoint(readBlock);
    3636
    37 
    38 
    39 
    4037    // how many pages are required to have enough data for the segment plus one overflow block?
    4138    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     
    4340    reserveBytes("InputStream", bytesToRead);
    4441    BasicBlock * const readExit = iBuilder->GetInsertBlock();
    45 
    4642    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
    47 
    4843    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    4944    Value * const bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, bytesToRead);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5408 r5411  
    1515#include <llvm/IR/CFG.h>
    1616#include <kernels/kernel.h>
     17#include <kernels/toolchain.h>
    1718
    1819namespace llvm { class Constant; }
     
    200201}
    201202
     203Value * ExtensibleBuffer::roundUpToPageSize(Value * const value) const {
     204    const auto pageSize = getpagesize();
     205    assert ((pageSize & (pageSize - 1)) == 0);
     206    Constant * const pageMask = ConstantInt::get(value->getType(), pageSize - 1);
     207    return iBuilder->CreateAnd(iBuilder->CreateAdd(value, pageMask), iBuilder->CreateNot(pageMask));
     208}
     209
    202210void ExtensibleBuffer::allocateBuffer() {
    203211    Type * ty = getType();
    204212    Value * instance = iBuilder->CreateCacheAlignedAlloca(ty);
    205213    Value * const capacityPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    206     Constant * initialSize = ConstantExpr::getSizeOf(ty->getStructElementType(1)->getPointerElementType());
    207     initialSize = ConstantExpr::getMul(initialSize, iBuilder->getSize(mBufferBlocks));
    208     initialSize = ConstantExpr::getIntegerCast(initialSize, iBuilder->getSizeTy(), false);
     214
     215    Type * const elementType = ty->getStructElementType(1)->getPointerElementType();
     216    Constant * size = ConstantExpr::getSizeOf(elementType);
     217    size = ConstantExpr::getMul(size, iBuilder->getSize(mBufferBlocks));
     218    size = ConstantExpr::getIntegerCast(size, iBuilder->getSizeTy(), false);
     219    Value * const initialSize = roundUpToPageSize(size);
     220
    209221    iBuilder->CreateStore(initialSize, capacityPtr);
    210     Value * addr = iBuilder->CreateAnonymousMMap(initialSize);
     222    Value * addr = iBuilder->CreateAnonymousMMap(size);
    211223    Value * const addrPtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    212224    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
     
    222234
    223235void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const requiredSize) const {
    224 
    225     // TODO: tweak this function to allow AlignedMalloc to begin copying prior to waiting for the
    226     // consumers to finish. MRemap could be used with the "do not move" flag set safely.
    227236
    228237    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    241250    kernel::KernelBuilder * const kernel = getProducer();
    242251    auto consumers = kernel->getStreamOutputs();
    243     if (LLVM_UNLIKELY(consumers.empty())) {
     252    if (consumers.empty()) {
    244253        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, expand);
    245254    } else { // we cannot risk expanding this buffer until all of the consumers have finished reading the data
    246255
    247         ConstantInt * const zeroSz = iBuilder->getSize(0);
     256        ConstantInt * const size0 = iBuilder->getSize(0);
    248257        Value * const segNo = kernel->acquireLogicalSegmentNo();
    249258        const auto n = consumers.size();
     
    262271            iBuilder->SetInsertPoint(load[i]);
    263272            Value * const outputConsumers = kernel->getConsumerState(consumers[i].name);
     273
    264274            Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
    265275            Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
    266             Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, zeroSz);
     276            Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
    267277            iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
    268278
    269279            iBuilder->SetInsertPoint(wait[i]);
    270280            PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
    271             consumerPhi->addIncoming(zeroSz, load[i]);
     281            consumerPhi->addIncoming(size0, load[i]);
    272282
    273283            Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
    274284            Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
    275285            Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
     286            assert (ready->getType() == iBuilder->getInt1Ty());
    276287            Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
    277288            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
     
    284295    }
    285296    iBuilder->SetInsertPoint(expand);
    286     Value * const reservedSize = iBuilder->CreateShl(requiredSize, 1);
    287 #ifdef __APPLE__
    288     Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
     297    Value * const reservedSize = roundUpToPageSize(iBuilder->CreateShl(requiredSize, 1));
    289298    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
    290     Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    291     iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
    292     iBuilder->CreateAlignedFree(baseAddr);
    293     Value * const remainingSize = iBuilder->CreateSub(reservedSize, currentSize);
    294     iBuilder->CreateMemZero(iBuilder->CreateGEP(newAddr, currentSize), remainingSize, iBuilder->getBitBlockWidth() / 8);
    295     newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    296 #else
    297     Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
     299
    298300    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    299301    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
    300302    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    301 #endif
    302303    iBuilder->CreateStore(newAddr, baseAddrPtr);
    303304    iBuilder->CreateStore(reservedSize, capacityPtr);
     
    425426: StreamSetBuffer(BufferKind::SwizzledCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks), mFieldWidth(fieldwidth) {
    426427    mUniqueID = "SW" + std::to_string(fieldwidth) + ":" + std::to_string(bufferBlocks);
    427     if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
    428     if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    429 
     428    if (mOverflowBlocks != 1) {
     429        mUniqueID += "_" + std::to_string(mOverflowBlocks);
     430    }
     431    if (AddressSpace > 0) {
     432        mUniqueID += "@" + std::to_string(AddressSpace);
     433    }
    430434}
    431435
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5408 r5411  
    207207protected:
    208208
     209    llvm::Value * roundUpToPageSize(llvm::Value * const value) const;
     210
    209211    llvm::Value * getBaseAddress(llvm::Value * self) const override;
    210212
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5409 r5411  
    1616#ifndef NDEBUG
    1717#include <llvm/IR/Verifier.h>
     18#include <boost/container/flat_set.hpp>
    1819#endif
    1920#include <llvm/PassRegistry.h>                     // for PassRegistry
     
    3132#include <IR_Gen/llvm2ptx.h>
    3233#endif
    33  
    34 
    3534
    3635using namespace llvm;
     
    233232
    234233void ParabixDriver::generatePipelineIR() {
     234    #ifndef NDEBUG
     235    if (LLVM_UNLIKELY(mKernelList.empty())) {
     236        report_fatal_error("Pipeline must contain at least one kernel");
     237    } else {
     238        boost::container::flat_set<kernel::KernelBuilder *> K(mKernelList.begin(), mKernelList.end());
     239        if (LLVM_UNLIKELY(K.size() != mKernelList.size())) {
     240            report_fatal_error("Kernel definitions can only occur once in the pipeline");
     241        }
     242    }
     243    #endif
    235244    // note: instantiation of all kernels must occur prior to initialization
    236245    for (const auto & k : mKernelList) {
     
    250259        codegen::ThreadNum = 1;
    251260        generatePipelineLoop(iBuilder, mKernelList);
     261    }
     262    for (const auto & k : mKernelList) {
     263        k->terminateInstance();
    252264    }
    253265}
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5409 r5411  
    176176    pxDriver.generatePipelineIR();
    177177   
    178     Value * lineCount = wck.createGetAccumulatorCall(wck.getInstance(), "lineCount");
    179     Value * wordCount = wck.createGetAccumulatorCall(wck.getInstance(), "wordCount");
    180     Value * charCount = wck.createGetAccumulatorCall(wck.getInstance(), "charCount");
     178    Value * lineCount = wck.createGetAccumulatorCall("lineCount");
     179    Value * wordCount = wck.createGetAccumulatorCall("wordCount");
     180    Value * charCount = wck.createGetAccumulatorCall("charCount");
    181181
    182182    iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
Note: See TracChangeset for help on using the changeset viewer.