Changeset 5347


Ignore:
Timestamp:
Feb 25, 2017, 12:50:29 PM (2 years ago)
Author:
nmedfort
Message:

Added enable asserts (-ea) command line flag + restructured BlockOrientedKernels? to allow for inlined code.

Location:
icGREP/icgrep-devel/icgrep
Files:
23 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5340 r5347  
    1111#include <llvm/IR/TypeBuilder.h>
    1212#include <fcntl.h>
     13#include <toolchain.h>
    1314
    1415using namespace llvm;
     
    2425    return CreateCall(openFn, {filename, oflag, mode});
    2526}
    26 
    27 
    2827
    2928// ssize_t write(int fildes, const void *buf, size_t nbyte);
     
    142141    ci->setTailCall();
    143142    ci->setCallingConv(malloc->getCallingConv());
    144     return CreateBitOrPointerCast(ci, type->getPointerTo());
     143    Value * ptr = CreateBitOrPointerCast(ci, type->getPointerTo());
     144    CreateAssert(ptr, "FATAL ERROR: out of memory");
     145    return ptr;
    145146}
    146147
     
    363364}
    364365
    365 void CBuilder::CreateAssert(llvm::Value * const toCheck, llvm::StringRef failureMessage) {
    366     #ifndef NDEBUG
    367     Module * m = getModule();
    368     Function * assertion = m->getFunction("__assert");
    369     if (LLVM_UNLIKELY(assertion == nullptr)) {
    370         auto ip = saveIP();
    371         assertion = cast<Function>(m->getOrInsertFunction("__assert", getVoidTy(), getInt1Ty(), getInt8PtrTy(), getSizeTy(), nullptr));
    372         BasicBlock * entry = BasicBlock::Create(getContext(), "", assertion);
    373         BasicBlock * failure = BasicBlock::Create(getContext(), "", assertion);
    374         BasicBlock * success = BasicBlock::Create(getContext(), "", assertion);
    375         auto arg = assertion->arg_begin();
    376         arg->setName("e");
    377         Value * e = &*arg++;
    378         arg->setName("msg");
    379         Value * msg = &*arg++;
    380         arg->setName("sz");
    381         Value * sz = &*arg;
    382         SetInsertPoint(entry);
    383         CreateCondBr(e, failure, success);
    384         SetInsertPoint(failure);
    385         CreateWriteCall(getInt32(2), msg, sz);
    386         Function * exit = m->getFunction("exit");
    387         if (LLVM_UNLIKELY(exit == nullptr)) {
    388             exit = cast<Function>(m->getOrInsertFunction("exit", getVoidTy(), getInt32Ty(), nullptr));
    389             exit->setDoesNotReturn();
    390             exit->setDoesNotThrow();
    391         }
    392         CreateCall(exit, getInt32(-1));
    393         CreateBr(success); // we're forced to have this to satisfy the LLVM verifier. this is not actually executed.
    394         SetInsertPoint(success);
    395         CreateRetVoid();
    396         restoreIP(ip);
    397     }
    398     CreateCall(assertion, {CreateICmpEQ(toCheck, Constant::getNullValue(toCheck->getType())), CreateGlobalStringPtr(failureMessage), getSize(failureMessage.size())});
    399     #endif
     366void CBuilder::CreateAssert(llvm::Value * const assertion, llvm::StringRef failureMessage) {
     367    if (codegen::EnableAsserts) {
     368        Module * const m = getModule();
     369        Function * function = m->getFunction("__assert");
     370        if (LLVM_UNLIKELY(function == nullptr)) {
     371            auto ip = saveIP();
     372            function = cast<Function>(m->getOrInsertFunction("__assert", getVoidTy(), getInt1Ty(), getInt8PtrTy(), getSizeTy(), nullptr));
     373            function->setDoesNotThrow();
     374            function->setDoesNotAlias(2);
     375            BasicBlock * const entry = BasicBlock::Create(getContext(), "", function);
     376            BasicBlock * const failure = BasicBlock::Create(getContext(), "", function);
     377            BasicBlock * const success = BasicBlock::Create(getContext(), "", function);
     378            auto arg = function->arg_begin();
     379            arg->setName("assertion");
     380            Value * e = &*arg++;
     381            arg->setName("msg");
     382            Value * msg = &*arg++;
     383            arg->setName("sz");
     384            Value * sz = &*arg;
     385            SetInsertPoint(entry);
     386            CreateCondBr(e, failure, success);
     387            SetInsertPoint(failure);
     388            Value * len = CreateAdd(sz, getSize(21));
     389            ConstantInt * _11 = getSize(11);
     390            Value * bytes = CreateMalloc(getInt8Ty(), len);
     391            CreateMemCpy(bytes, CreateGlobalStringPtr("Assertion `"), _11, 1);
     392            CreateMemCpy(CreateGEP(bytes, _11), msg, sz, 1);
     393            CreateMemCpy(CreateGEP(bytes, CreateAdd(sz, _11)), CreateGlobalStringPtr("' failed.\n"), getSize(10), 1);
     394            CreateWriteCall(getInt32(2), bytes, len);
     395            CreateExit(-1);
     396            CreateBr(success); // necessary to satisfy the LLVM verifier. this is not actually executed.
     397            SetInsertPoint(success);
     398            CreateRetVoid();
     399            restoreIP(ip);
     400        }
     401        CreateCall(function, {CreateICmpEQ(assertion, Constant::getNullValue(assertion->getType())), CreateGlobalStringPtr(failureMessage), getSize(failureMessage.size())});
     402    }
     403}
     404
     405void CBuilder::CreateExit(const int exitCode) {
     406    Module * const m = getModule();
     407    Function * exit = m->getFunction("exit");
     408    if (LLVM_UNLIKELY(exit == nullptr)) {
     409        exit = cast<Function>(m->getOrInsertFunction("exit", getVoidTy(), getInt32Ty(), nullptr));
     410        exit->setDoesNotReturn();
     411        exit->setDoesNotThrow();
     412    }
     413    CreateCall(exit, getInt32(exitCode));
    400414}
    401415
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5340 r5347  
    109109    virtual llvm::StoreInst *  CreateAtomicStoreRelease(llvm::Value * val, llvm::Value * ptr);
    110110   
    111     // Warning! this class must be compiled in debug mode or the check will be ignored.
    112     void CreateAssert(llvm::Value * toCheck, llvm::StringRef failureMessage);
     111    void CreateAssert(llvm::Value * assertion, llvm::StringRef failureMessage);
     112
     113    void CreateExit(const int exitCode);
    113114
    114115protected:
  • icGREP/icgrep-devel/icgrep/editd/editd_cpu_kernel.h

    r5297 r5347  
    1919    editdCPUKernel(IDISA::IDISA_Builder * b, unsigned dist, unsigned pattLen);
    2020   
    21    
     21
    2222private:
    2323    void generateDoBlockMethod() override;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5337 r5347  
    3030IR_Gen/llvm2ptx.h
    3131IR_Gen/tracegen.h
     32kernels/alignedprint.cpp
     33kernels/alignedprint.h
    3234kernels/cc_kernel.cpp
    3335kernels/cc_kernel.h
     36kernels/cc_scan_kernel.cpp
     37kernels/cc_scan_kernel.h
    3438kernels/deletion.cpp
    3539kernels/deletion.h
     
    4044kernels/kernel.cpp
    4145kernels/kernel.h
     46kernels/match_count.cpp
     47kernels/match_count.h
    4248kernels/mmap_kernel.cpp
    4349kernels/mmap_kernel.h
     
    5662kernels/stdout_kernel.cpp
    5763kernels/stdout_kernel.h
     64kernels/streams_merge.cpp
     65kernels/streams_merge.h
    5866kernels/streamset.cpp
    5967kernels/streamset.h
     68kernels/swizzle.cpp
     69kernels/swizzle.h
     70kernels/unicode_linebreak_kernel.cpp
     71kernels/unicode_linebreak_kernel.h
    6072pablo/analysis/pabloverifier.cpp
    6173pablo/analysis/pabloverifier.hpp
     
    212224object_cache.cpp
    213225object_cache.h
     226preprocess.cpp
    214227toolchain.cpp
    215228toolchain.h
     
    221234wc.cpp
    222235CMakeLists.txt
    223 kernels/alignedprint.h
    224 kernels/alignedprint.cpp
    225 kernels/swizzle.cpp
    226 kernels/swizzle.h
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5317 r5347  
    105105}
    106106
    107 inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * iBuilder, const std::vector<Value *> & masks, Value * strm) {
     107inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * iBuilder, const std::vector<Value *> & masks, Value * strm) {   
    108108    Value * PEXT_func = nullptr;
    109     if (PEXT_width == 64) PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_64);
    110     else if (PEXT_width == 32) PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_32);
     109    if (PEXT_width == 64) {
     110        PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_64);
     111    } else if (PEXT_width == 32) {
     112        PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_32);
     113    }
    111114    Value * v = iBuilder->fwCast(PEXT_width, strm);
    112115    Value * output = Constant::getNullValue(v->getType());
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5328 r5347  
    9595    }
    9696
    97     // Add any additional kernel declarations
    98     addAdditionalKernelDeclarations(client, selfType);
     97//    // Add any additional kernel declarations
     98//    addAdditionalKernelDeclarations(client, selfType);
    9999
    100100    // Create the accumulator get function prototypes
     
    107107        auto args = accumFn->arg_begin();
    108108        args->setName("self");
    109         assert ((++args) == accumFn->arg_end());
    110109    }
    111110
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5346 r5347  
    155155    }
    156156   
    157     virtual void addAdditionalKernelDeclarations(llvm::Module * module, llvm::PointerType * selfType) {}
     157//    virtual void addAdditionalKernelDeclarations(llvm::Module * module, llvm::PointerType * selfType) {}
    158158
    159159protected:
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5340 r5347  
    1414#include <llvm/IR/LegacyPassManager.h>
    1515#include <llvm/Transforms/Scalar.h>
    16 #ifndef NDEBUG
    1716#include <llvm/IR/Verifier.h>
    18 #endif
    1917
    2018static const auto DO_BLOCK_SUFFIX = "_DoBlock";
     
    166164    addKernelDeclarations(iBuilder->getModule());
    167165    callGenerateInitMethod();
    168     generateInternalMethods();
    169166    callGenerateDoSegmentMethod();
    170167    // Implement the accumulator get functions
     
    181178
    182179void KernelBuilder::callGenerateDoSegmentMethod() {
    183     mCurrentFunction = getDoSegmentFunction();
     180    mCurrentMethod = getDoSegmentFunction();
    184181    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    185     auto args = mCurrentFunction->arg_begin();
     182    auto args = mCurrentMethod->arg_begin();
    186183    mSelf = &*(args++);
    187184    Value * doFinal = &*(args++);
     
    195192
    196193void KernelBuilder::callGenerateInitMethod() {
    197     mCurrentFunction = getInitFunction();
     194    mCurrentMethod = getInitFunction();
    198195    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    199     Function::arg_iterator args = mCurrentFunction->arg_begin();
     196    Function::arg_iterator args = mCurrentMethod->arg_begin();
    200197    mSelf = &*(args++);
    201198    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
     
    265262
    266263void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
    267     //iBuilder->CallPrintInt(getName() + " " + name + " processed", value);
    268264    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    269265}
    270266
    271267void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
    272     //iBuilder->CallPrintInt(getName() + " " + name +  " produced", value);
    273268    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    274269}
     
    397392
    398393BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
    399     return BasicBlock::Create(iBuilder->getContext(), name, mCurrentFunction);
     394    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
    400395}
    401396
     
    440435}
    441436
    442 //  The default finalBlock method simply dispatches to the doBlock routine.
    443 void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingBytes) {
    444 //    std::vector<Value *> args = {self};
    445 //    for (Argument & arg : function->getArgumentList()){
    446 //        args.push_back(&arg);
    447 //    }
    448     CreateDoBlockMethodCall();
    449 }
    450 
    451437//  The default doSegment method dispatches to the doBlock routine for
    452438//  each block of the given number of blocksToDo, and then updates counts.
     439
    453440void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
     441
     442    // Use the pass manager to optimize the function.
     443    FunctionPassManager fpm(iBuilder->getModule());
     444    #ifndef NDEBUG
     445    fpm.add(createVerifierPass());
     446    #endif
     447    fpm.add(createReassociatePass());             //Reassociate expressions.
     448    fpm.add(createGVNPass());                     //Eliminate common subexpressions.
     449    fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     450    fpm.doInitialization();
    454451
    455452    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    457454    BasicBlock * const strideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
    458455    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
    459     BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
    460     BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
    461456
    462457    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     
    481476    iBuilder->SetInsertPoint(strideLoopBody);
    482477
    483     CreateDoBlockMethodCall();
     478    /// GENERATE DO BLOCK METHOD
     479
     480    generateDoBlockMethod(fpm);
     481
     482    /// UPDATE PROCESSED COUNTS
    484483
    485484    processed = getProcessedItemCount(mStreamSetInputs[0].name);
    486485    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
    487486    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    488    
    489     stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), strideLoopBody);
     487
     488    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    490489    iBuilder->CreateBr(strideLoopCond);
    491490
     
    493492
    494493    // Now conditionally perform the final block processing depending on the doFinal parameter.
     494    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
     495    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
    495496    iBuilder->CreateCondBr(doFinal, doFinalBlock, segmentDone);
    496497    iBuilder->SetInsertPoint(doFinalBlock);
    497498
    498499    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
    499 
    500     CreateDoFinalBlockMethodCall(remainingItems);
    501    
     500    generateFinalBlockMethod(remainingItems, fpm);
     501
    502502    itemsDone = producerPos[0];
    503     setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);   
    504    
     503    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    505504    setTerminationSignal();
    506505    iBuilder->CreateBr(segmentDone);
    507506
    508507    iBuilder->SetInsertPoint(segmentDone);
    509 
    510 }
    511 
    512 void BlockOrientedKernel::generateInternalMethods() {
    513 
    514     callGenerateDoBlockMethod();
    515 
    516     callGenerateDoFinalBlockMethod();
    517 }
    518 
    519 void BlockOrientedKernel::callGenerateDoBlockMethod() {
    520     mCurrentFunction = getDoBlockFunction();
    521     auto args = mCurrentFunction->arg_begin();
    522     mSelf = &(*args);
    523     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     508}
     509
     510void BlockOrientedKernel::generateDoBlockMethod(FunctionPassManager & fpm) {
     511
     512    Value * const self = mSelf;
     513    Function * const cp = mCurrentMethod;
     514    auto ip = iBuilder->saveIP();
     515
     516    /// Check if the do block method is called and create the function if necessary   
     517    if (isCalled()) {
     518        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
     519        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     520        mCurrentMethod->setCallingConv(CallingConv::C);
     521        mCurrentMethod->setDoesNotThrow();
     522        mCurrentMethod->setDoesNotCapture(1);
     523        auto args = mCurrentMethod->arg_begin();
     524        mCurrentMethod = mCurrentMethod;
     525        mSelf = &*args;
     526        mSelf->setName("self");
     527        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     528    }
     529
     530    writeDoBlockMethod();
     531
     532    /// Call the do block method if necessary then restore the current function state to the do segement method
     533
     534    if (isCalled()) {
     535        iBuilder->CreateRetVoid();
     536        mDoBlockMethod = mCurrentMethod;
     537        fpm.run(*mCurrentMethod);
     538        iBuilder->restoreIP(ip);
     539        iBuilder->CreateCall(mCurrentMethod, self);
     540
     541        mSelf = self;
     542        mCurrentMethod = cp;
     543    }
     544
     545}
     546
     547void BlockOrientedKernel::writeDoBlockMethod() {
     548
    524549    std::vector<Value *> priorProduced;
    525550    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     
    528553        }
    529554    }
    530     generateDoBlockMethod(); // must be implemented by the KernelBuilder subtype
     555
     556    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
     557
    531558    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    532559        unsigned priorIdx = 0;
     
    545572            priorIdx++;
    546573        }
    547     }   
    548     iBuilder->CreateRetVoid();
    549     #ifndef NDEBUG
    550     std::string tmp;
    551     raw_string_ostream out(tmp);
    552     if (verifyFunction(*mCurrentFunction, &out)) {
    553         mCurrentFunction->dump();
    554         report_fatal_error(getName() + ": " + out.str());
    555     }
    556     #endif
    557     // Use the pass manager to optimize the function.
    558     FunctionPassManager fpm(iBuilder->getModule());
    559     fpm.add(createReassociatePass());             //Reassociate expressions.
    560     fpm.add(createGVNPass());                     //Eliminate common subexpressions.
    561     fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    562     fpm.doInitialization();
    563     fpm.run(*mCurrentFunction);
    564 }
    565 
    566 
    567 void BlockOrientedKernel::callGenerateDoFinalBlockMethod() {
    568     mCurrentFunction = getDoFinalBlockFunction();
    569     auto args = mCurrentFunction->arg_begin();
    570     mSelf = &(*args++);
    571     Value * const remainingBytes = &(*args);
    572     iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    573     generateFinalBlockMethod(remainingBytes); // possibly overridden by the KernelBuilder subtype
    574     iBuilder->CreateRetVoid();
    575 }
    576 
    577 Function * BlockOrientedKernel::getDoBlockFunction() const {
    578     const auto name = getName() + DO_BLOCK_SUFFIX;
    579     Function * const f = iBuilder->getModule()->getFunction(name);
    580     if (LLVM_UNLIKELY(f == nullptr)) {
    581         report_fatal_error("Cannot find " + name);
    582     }
    583     return f;
    584 }
    585 
    586 CallInst * BlockOrientedKernel::CreateDoBlockMethodCall() const {
    587     return iBuilder->CreateCall(getDoBlockFunction(), mSelf);
    588 }
    589 
    590 Function * BlockOrientedKernel::getDoFinalBlockFunction() const {
    591     const auto name = getName() + FINAL_BLOCK_SUFFIX;
    592     Function * const f = iBuilder->getModule()->getFunction(name);
    593     if (LLVM_UNLIKELY(f == nullptr)) {
    594         report_fatal_error("Cannot find " + name);
    595     }
    596     return f;
    597 }
    598 
    599 CallInst * BlockOrientedKernel::CreateDoFinalBlockMethodCall(Value * remainingItems) const {
    600     return iBuilder->CreateCall(getDoFinalBlockFunction(), {mSelf, remainingItems});
    601 }
    602 
    603 void BlockOrientedKernel::addAdditionalKernelDeclarations(Module * m, PointerType * selfType) {
    604     // Create the doBlock and finalBlock function prototypes
    605     FunctionType * const doBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
    606     Function * const doBlock = Function::Create(doBlockType, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, m);
    607     doBlock->setCallingConv(CallingConv::C);
    608     doBlock->setDoesNotThrow();
    609     doBlock->setDoesNotCapture(1);
    610     auto args = doBlock->arg_begin();
    611     args->setName("self");
    612 
    613     FunctionType * const finalBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType, iBuilder->getSizeTy()}, false);
    614     Function * const finalBlock = Function::Create(finalBlockType, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, m);
    615     finalBlock->setCallingConv(CallingConv::C);
    616     finalBlock->setDoesNotThrow();
    617     finalBlock->setDoesNotCapture(1);
    618     args = finalBlock->arg_begin();
    619     args->setName("self");
    620     (++args)->setName("remainingBytes");
    621 }
     574    }
     575
     576}
     577
     578void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingItems, FunctionPassManager & fpm) {
     579
     580    Value * const self = mSelf;
     581    Function * const cp = mCurrentMethod;
     582    Value * const remainingItemCount = remainingItems;
     583    auto ip = iBuilder->saveIP();
     584
     585    if (isCalled()) {
     586        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
     587        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
     588        mCurrentMethod->setCallingConv(CallingConv::C);
     589        mCurrentMethod->setDoesNotThrow();
     590        mCurrentMethod->setDoesNotCapture(1);
     591        auto args = mCurrentMethod->arg_begin();
     592        mSelf = &*args;
     593        mSelf->setName("self");
     594        remainingItems = &*(++args);
     595        remainingItems->setName("remainingItems");
     596        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     597    }
     598
     599    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernelBuilder subtype
     600
     601    if (isCalled()) {
     602        iBuilder->CreateRetVoid();       
     603        fpm.run(*mCurrentMethod);
     604        iBuilder->restoreIP(ip);
     605        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
     606        mCurrentMethod = cp;
     607        mSelf = self;
     608    }
     609
     610}
     611
     612//  The default finalBlock method simply dispatches to the doBlock routine.
     613void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingItems) {
     614    CreateDoBlockMethodCall();
     615}
     616
     617void BlockOrientedKernel::CreateDoBlockMethodCall() {
     618    if (isCalled()) {
     619        iBuilder->CreateCall(mDoBlockMethod, mSelf);
     620    } else {
     621        // TODO: can we clone the DoBlock method instead of regenerating it?
     622        writeDoBlockMethod();
     623    }
     624}
     625
     626// CONSTRUCTOR
     627
     628BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
     629                                                           std::string && kernelName,
     630                                                           std::vector<Binding> && stream_inputs,
     631                                                           std::vector<Binding> && stream_outputs,
     632                                                           std::vector<Binding> && scalar_parameters,
     633                                                           std::vector<Binding> && scalar_outputs,
     634                                                           std::vector<Binding> && internal_scalars)
     635: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     636, mDoBlockMethod(nullptr)
     637, mInlined(false) {
     638
     639}
     640
    622641
    623642// CONSTRUCTOR
     
    638657
    639658// CONSTRUCTOR
    640 BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
    641                                          std::string && kernelName,
    642                                          std::vector<Binding> && stream_inputs,
    643                                          std::vector<Binding> && stream_outputs,
    644                                          std::vector<Binding> && scalar_parameters,
    645                                          std::vector<Binding> && scalar_outputs,
    646                                          std::vector<Binding> && internal_scalars)
    647 : KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
    648 
    649 }
    650 
    651 
    652 
    653 
    654 // CONSTRUCTOR
    655659SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
    656660                                             std::string && kernelName,
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5329 r5347  
    1515namespace llvm { class ConstantInt; }
    1616namespace llvm { class Function; }
     17namespace llvm { namespace legacy { class FunctionPassManager; } }
    1718namespace llvm { class IntegerType; }
    1819namespace llvm { class LoadInst; }
     
    137138    virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    138139
    139     virtual void generateInternalMethods() { }
     140//    virtual void generateInternalMethods() { }
    140141
    141142    // Add an additional scalar field to the KernelState struct.
     
    265266
    266267    llvm::Value *                                   mSelf;
    267     llvm::Function *                                mCurrentFunction;
     268    llvm::Function *                                mCurrentMethod;
    268269
    269270    std::vector<llvm::Type *>                       mKernelFields;
     
    293294
    294295class BlockOrientedKernel : public KernelBuilder {
    295 public:
    296 
    297     llvm::CallInst * CreateDoBlockMethodCall() const;
    298 
    299     llvm::CallInst * CreateDoFinalBlockMethodCall(llvm::Value * remainingItems) const;
    300 
    301296protected:
    302297
    303     virtual void addAdditionalKernelDeclarations(llvm::Module * module, llvm::PointerType * selfType);
     298    void CreateDoBlockMethodCall();
    304299
    305300    // Each kernel builder subtype must provide its own logic for generating
     
    314309    // not be overridden.
    315310
    316     virtual void generateFinalBlockMethod(llvm::Value * remainingBytes);
    317 
    318     virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) final;
    319 
    320     void generateInternalMethods() override final;
     311    virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
     312
     313    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
    321314
    322315    BlockOrientedKernel(IDISA::IDISA_Builder * builder,
     
    330323    virtual ~BlockOrientedKernel() { }
    331324
    332     llvm::Value * loadBlock(const std::string & inputName, llvm::Value * const streamIndex) const;
    333 
    334     llvm::Value * loadPack(const std::string & inputName, llvm::Value * const streamIndex, llvm::Value * const packIndex) const;
    335 
    336     llvm::Function * getDoBlockFunction() const;
    337 
    338     llvm::Function * getDoFinalBlockFunction() const;
     325    bool isCalled() const {
     326        return !mInlined;
     327    }
     328
     329    bool isInlined() const {
     330        return mInlined;
     331    }
     332
     333    void setInlined(const bool value = true) {
     334        mInlined = value;
     335    }
    339336
    340337private:
    341338
    342     void callGenerateDoBlockMethod();
    343 
    344     void callGenerateDoFinalBlockMethod();
    345 
     339    void generateDoBlockMethod(llvm::legacy::FunctionPassManager & fpm);
     340
     341    void writeDoBlockMethod();
     342
     343    void generateFinalBlockMethod(llvm::Value *remainingItems, llvm::legacy::FunctionPassManager & fpm);
     344
     345private:
     346
     347    llvm::Function * mDoBlockMethod;
     348    bool             mInlined;
    346349};
    347350
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5330 r5347  
    5353}
    5454
    55 P2SKernel::P2SKernel(IDISA::IDISA_Builder * iBuilder)
    56 : BlockOrientedKernel(iBuilder, "p2s",
    57               {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
    58               {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
    59               {}, {}, {}) {
    60 
    61 }
    62 
    63 
    6455void P2SKernelWithCompressedOutput::generateDoBlockMethod() {
    6556    IntegerType * i32 = iBuilder->getInt32Ty();
     
    8980    setProducedItemCount("byteStream", unitsGenerated);
    9081}
    91    
    92 P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
    93 : BlockOrientedKernel(iBuilder, "p2s_compress",
    94               {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
    95                       {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", MaxRatio(1)}},
    96               {}, {}, {}) {
    97     setDoBlockUpdatesProducedItemCountsAttribute(true);
    98 }
    99    
    100    
    10182
    10283void P2S16Kernel::generateDoBlockMethod() {
     
    120101    }
    121102}
    122    
    123 
    124 P2S16Kernel::P2S16Kernel(IDISA::IDISA_Builder * iBuilder)
    125 : BlockOrientedKernel(iBuilder, "p2s_16",
    126               {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
    127               {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
    128               {}, {}, {}) {
    129 
    130 }
    131 
    132    
     103       
    133104void P2S16KernelWithCompressedOutput::generateDoBlockMethod() {
    134105    IntegerType * i32Ty = iBuilder->getInt32Ty();
     
    174145    setProducedItemCount("i16Stream", i16UnitsFinal);
    175146}
    176    
     147
     148P2SKernel::P2SKernel(IDISA::IDISA_Builder * iBuilder)
     149: BlockOrientedKernel(iBuilder, "p2s",
     150              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
     151              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
     152              {}, {}, {}) {
     153}
     154
     155P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
     156: BlockOrientedKernel(iBuilder, "p2s_compress",
     157              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
     158                      {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", MaxRatio(1)}},
     159              {}, {}, {}) {
     160    setDoBlockUpdatesProducedItemCountsAttribute(true);
     161}
     162
     163P2S16Kernel::P2S16Kernel(IDISA::IDISA_Builder * iBuilder)
     164: BlockOrientedKernel(iBuilder, "p2s_16",
     165              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
     166              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
     167              {}, {}, {}) {
     168}
     169
     170
    177171P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * b)
    178172: BlockOrientedKernel(b, "p2s_16_compress",
     
    183177              {}) {
    184178    setDoBlockUpdatesProducedItemCountsAttribute(true);
     179
    185180}
    186181   
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5317 r5347  
    166166    setNoTerminateAttribute(true);
    167167    setDoBlockUpdatesProducedItemCountsAttribute(false);
    168 
     168    setInlined(true);
    169169}
    170170
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5333 r5347  
    254254    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    255255, mGrepType(grepType) {
    256 
    257 }
    258 
    259 }
     256    setInlined(true);
     257}
     258
     259}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5340 r5347  
    200200}
    201201
    202 bool dominates(const Instruction * const x, const Instruction * const y) {
    203     // Are they in the same basic block?
    204     if (x->getParent() == y->getParent()) {
    205         if (y->getNextNode() == nullptr) {
    206             return true;
    207         }
    208         for (const Instruction * z = x; z; z = z->getNextNode()) {
    209             if (z == y) {
    210                 return true;
    211             }
    212         }
    213         return false;
    214     } else {
    215         const BasicBlock * yp = y->getParent();
    216         for (auto pi = pred_begin(yp), pi_end = pred_end(yp); pi != pi_end; ++pi) {
    217             if (!dominates(x, (*pi)->getTerminator())) {
    218                 return false;
    219             }
    220         }
    221         return true;
    222     }
    223 }
    224 
    225202inline bool ExpandableBuffer::isGuaranteedCapacity(const llvm::Value * const index) const {
    226203    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
  • icGREP/icgrep-devel/icgrep/kernels/unicode_linebreak_kernel.cpp

    r5336 r5347  
    2424, unsigned basisBitsCount)
    2525: PabloKernel(iBuilder, unicodelinebreak +"_kernel", {Binding{iBuilder->getStreamSetTy(basisBitsCount), "basis"}}) {
     26
    2627    CC_Compiler ccc(this, getInput(0));
    2728    auto & builder = ccc.getBuilder();
     
    6970    builder.createAssign(r, UnicodeLineBreak);
    7071
     72    setInlined(true);
    7173}
  • icGREP/icgrep-devel/icgrep/pablo/carry_data.h

    r5267 r5347  
    5555    }
    5656
     57    bool hasVariableLength() const {
     58        return variableLength;
     59    }
     60
    5761   
    5862protected:
    5963
    60     SummaryType     summaryType;
    61     bool            variableLength;
     64    SummaryType             summaryType;
     65    bool                    variableLength;
    6266
    6367};
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5340 r5347  
    8585
    8686/** ------------------------------------------------------------------------------------------------------------- *
    87  * @brief allocateCarryData
    88  ** ------------------------------------------------------------------------------------------------------------- */
    89 void CarryManager::allocateCarryData(PabloKernel * const kernel) {
    90 //    mKernel = kernel;
    91 //    mCarryScopes = 0;
    92 //    mCarryScopeIndex.push_back(0);
    93 //    mCurrentFrame = kernel->getScalarFieldPtr("carries");
    94 //    allocateCarryData (mCurrentFrame->getType());
    95 }
    96 
    97 ///** ------------------------------------------------------------------------------------------------------------- *
    98 // * @brief allocateCarryData
    99 // ** ------------------------------------------------------------------------------------------------------------- */
    100 //void CarryManager::allocateCarryData(Type * const type) {
    101 //    assert (type->isStructTy());
    102 
    103 //    const auto scopeIndex = mCarryScopes++;
    104 
    105 //    const unsigned vl = mCarryMetadata[scopeIndex].variableLength ? 4 : 0;
    106 
    107 //    if (LLVM_UNLIKELY(vl != 0)) {
    108 
    109 
    110 //        ConstantInt * const capacity = iBuilder->getSize(vl);
    111 //        Value * capacityPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    112 //        iBuilder->CreateStore(capacity, capacityPtr, false);
    113 //        Value * arrayPtr = iBuilder->CreateGEP(mCurrentFrame, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    114 //        Value * carryStateType = arrayPtr->getType()->getPointerElementType();
    115 //        Value * array = iBuilder->CreateAlignedMalloc(carryStateType, capacity, iBuilder->getCacheAlignment());
    116 //        Constant * typeWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(carryStateType), iBuilder->getSizeTy(), false);
    117 //        iBuilder->CreateMemZero(array, iBuilder->CreateMul(capacity, typeWidth), iBuilder->getCacheAlignment());
    118 //        iBuilder->CreateStore(array, arrayPtr, false);
    119 
    120 
    121 
    122 
    123 //    } else {
    124 
    125 
    126 //        for (unsigned i = 0; i < type->getStructNumElements(); ++i) {
    127 
    128 
    129 
    130 //        }
    131 
    132 
    133 
    134 //    }
    135 
    136 
    137 
    138 
    139 
    140 
    141 //}
    142 
    143 
    144 /** ------------------------------------------------------------------------------------------------------------- *
    14587 * @brief initializeCodeGen
    14688 ** ------------------------------------------------------------------------------------------------------------- */
     
    207149    }
    208150
    209     if (LLVM_UNLIKELY(mCarryInfo->variableLength)) {
     151    if (LLVM_UNLIKELY(mCarryInfo->hasVariableLength())) {
    210152        // Check whether we need to resize the carry state
    211153        PHINode * index = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     
    230172
    231173        Constant * carryStateWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(carryStateType), capacity->getType(), false);
    232 
    233         Value * newCapacity = iBuilder->CreateMul(iBuilder->CreateAdd(index, iBuilder->getSize(1)), iBuilder->getSize(2));
     174        Value * newCapacity = iBuilder->CreateSelect(iBuilder->CreateICmpNE(index, ConstantInt::getNullValue(index->getType())), iBuilder->CreateMul(index, iBuilder->getSize(2)), iBuilder->getSize(4));
    234175        Value * newArray = iBuilder->CreateAlignedMalloc(carryStateType, newCapacity, iBuilder->getCacheAlignment());
    235176
     
    241182        iBuilder->CreateCondBr(isNullCarryState, zeroBlock, cleanUpBlock);
    242183        iBuilder->SetInsertPoint(cleanUpBlock);
     184
     185
    243186
    244187        iBuilder->CreateMemCpy(newArray, array, iBuilder->CreateMul(capacity, carryStateWidth), iBuilder->getCacheAlignment());
     
    275218        mCarrySummary.pop_back();
    276219    }
    277     if (LLVM_UNLIKELY(mCarryInfo->variableLength)) {
     220    if (LLVM_UNLIKELY(mCarryInfo->hasVariableLength())) {
    278221        assert (!mLoopIndicies.empty());
    279222        PHINode * index = mLoopIndicies.back();
     
    592535 ** ------------------------------------------------------------------------------------------------------------- */
    593536bool CarryManager::inCollapsingCarryMode() const {
    594     return (mCurrentScope->getBranch() && isa<While>(mCurrentScope->getBranch()) && !mCarryInfo->variableLength);
     537    return (mCurrentScope->getBranch() && isa<While>(mCurrentScope->getBranch()) && !mCarryInfo->hasVariableLength());
    595538}
    596539
     
    641584StructType * CarryManager::analyse(PabloBlock * const scope, const unsigned ifDepth, const unsigned loopDepth) {
    642585
     586    assert (scope != mKernel->getEntryBlock() || mCarryScopes == 0);
     587    assert (mCarryScopes < mCarryMetadata.size());
     588    assert (mCarryPackType);
     589
     590    CarryData & cd = mCarryMetadata[mCarryScopes++];
     591
    643592    std::vector<Type *> state;
    644593
    645     assert (mCarryPackType);
    646 
    647594    Type * const carryPackType = (loopDepth == 0) ? mCarryPackType : ArrayType::get(mCarryPackType, 2);
    648 
    649     const auto scopeIndex = mCarryScopes++;
    650 
    651     assert (scopeIndex < mCarryMetadata.size());
    652595
    653596    bool hasLongAdvances = false;
     
    678621    }
    679622
    680     CarryData & cd = mCarryMetadata[scopeIndex];
    681 
    682623    StructType * carryState = nullptr;
    683624
     
    705646        carryState = StructType::get(iBuilder->getContext(), state);
    706647        // If we in a loop and cannot use collapsing carry mode, convert the struct into a capacity and pointer pair.
    707         if (LLVM_UNLIKELY(cd.variableLength)) {
     648        if (LLVM_UNLIKELY(cd.hasVariableLength())) {
     649            mHasVariableLengthCarryData = true;
    708650            carryState = StructType::get(iBuilder->getSizeTy(), carryState->getPointerTo(), nullptr);
    709651        }
     
    730672, mIfDepth(0)
    731673, mHasLongAdvance(false)
     674, mHasVariableLengthCarryData(false)
    732675, mHasLoop(false)
    733676, mLoopDepth(0)
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r5340 r5347  
    4646    void initializeCarryData(PabloKernel * const kernel);
    4747
    48     void allocateCarryData(PabloKernel * const kernels);
    49 
    5048    void initializeCodeGen();
    5149
     
    8583
    8684    static unsigned getScopeCount(PabloBlock * const scope, unsigned index = 0);
     85
    8786    static bool requiresVariableLengthMode(const PabloBlock * const scope);
     87
    8888    llvm::StructType * analyse(PabloBlock * const scope, const unsigned ifDepth = 0, const unsigned whileDepth = 0);
    8989
     
    123123    bool                                            mHasLongAdvance;
    124124
     125    bool                                            mHasVariableLengthCarryData;
     126
    125127    bool                                            mHasLoop;
    126128    unsigned                                        mLoopDepth;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5342 r5347  
    6161    examineBlock(mKernel->getEntryBlock());
    6262    mCarryManager->initializeCarryData(mKernel);
    63 }
    64 
    65 void PabloCompiler::allocateKernelData() {
    66     mCarryManager->allocateCarryData(mKernel);
    6763}
    6864
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r5340 r5347  
    3838    void initializeKernelData();
    3939
    40     void allocateKernelData();
    41 
    4240    void compile();
    4341
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5340 r5347  
    116116}
    117117
    118 void PabloKernel::generateInitMethod() {
    119     mPabloCompiler->allocateKernelData();
    120 }
    121 
    122118void PabloKernel::generateDoBlockMethod() {
    123119    mPabloCompiler->compile();
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5340 r5347  
    131131    void prepareKernel()  override final;
    132132
    133     void generateInitMethod() override final;
    134 
    135133    void generateDoBlockMethod() override final;
    136134
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5314 r5347  
    3030
    3131static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
     32
    3233static cl::bits<DebugFlags>
    3334DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
     
    6061int BufferSegments;
    6162int ThreadNum;
     63bool EnableAsserts;
    6264
    6365static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
     
    6567static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
    6668static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
     69
     70static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(
     71#ifndef NDEBUG
     72true
     73#else
     74false
     75#endif
     76));
    6777
    6878const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5314 r5347  
    3232extern int BufferSegments;
    3333extern int ThreadNum;
     34extern bool EnableAsserts;
    3435#ifdef CUDA_ENABLED
    3536extern bool NVPTX;
Note: See TracChangeset for help on using the changeset viewer.