Ignore:
Timestamp:
Feb 26, 2017, 4:30:51 PM (2 years ago)
Author:
nmedfort
Message:

First attempt at inlining all DoBlock? and FinalBlock? functions by using indirect jumps. Disabled for NVPTX until Linda can check whether they're supported by the LLVM NVPTX library.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5347 r5350  
    1010#include <llvm/IR/Function.h>
    1111#include <llvm/IR/Instructions.h>
     12#include <llvm/IR/MDBuilder.h>
    1213#include <llvm/IR/Module.h>
     14#include <llvm/IR/Verifier.h>
    1315#include <llvm/Support/raw_ostream.h>
    14 #include <llvm/IR/LegacyPassManager.h>
    15 #include <llvm/Transforms/Scalar.h>
    16 #include <llvm/IR/Verifier.h>
     16#include <llvm/Transforms/Utils/Local.h>
    1717
    1818static const auto DO_BLOCK_SUFFIX = "_DoBlock";
     
    3535using namespace kernel;
    3636using namespace parabix;
    37 using namespace llvm::legacy;
    3837
    3938unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
     
    440439void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
    441440
    442     // Use the pass manager to optimize the function.
    443     FunctionPassManager fpm(iBuilder->getModule());
    444     #ifndef NDEBUG
    445     fpm.add(createVerifierPass());
    446     #endif
    447     fpm.add(createReassociatePass());             //Reassociate expressions.
    448     fpm.add(createGVNPass());                     //Eliminate common subexpressions.
    449     fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    450     fpm.doInitialization();
    451 
    452441    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    453442    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
    454     BasicBlock * const strideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
     443    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
    455444    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
    456445
    457446    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    458 
    459447    Value * availablePos = producerPos[0];
    460448    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     
    466454    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    467455    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
     456
    468457    iBuilder->CreateBr(strideLoopCond);
    469458
     
    472461    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    473462    Value * notDone = iBuilder->CreateICmpNE(stridesRemaining, iBuilder->getSize(0));
    474     iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
    475 
    476     iBuilder->SetInsertPoint(strideLoopBody);
     463    iBuilder->CreateCondBr(notDone, mStrideLoopBody, stridesDone);
     464
     465    iBuilder->SetInsertPoint(mStrideLoopBody);
     466
     467    if (useIndirectBr()) {
     468        mStrideLoopBranchAddress = iBuilder->CreatePHI(iBuilder->getInt8PtrTy(), 2);
     469        mStrideLoopBranchAddress->addIncoming(BlockAddress::get(strideLoopCond), strideLoopCond);       
     470    }
    477471
    478472    /// GENERATE DO BLOCK METHOD
    479473
    480     generateDoBlockMethod(fpm);
     474    writeDoBlockMethod();
    481475
    482476    /// UPDATE PROCESSED COUNTS
     
    487481
    488482    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    489     iBuilder->CreateBr(strideLoopCond);
     483
     484    if (useIndirectBr()) {
     485        mStrideLoopBranch = iBuilder->CreateIndirectBr(mStrideLoopBranchAddress, 2);
     486        mStrideLoopBranch->addDestination(strideLoopCond);
     487    } else {
     488        iBuilder->CreateBr(strideLoopCond);
     489    }
    490490
    491491    iBuilder->SetInsertPoint(stridesDone);
     
    498498
    499499    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
    500     generateFinalBlockMethod(remainingItems, fpm);
     500    writeFinalBlockMethod(remainingItems);
     501    // if remainingItems was not used, this will eliminate it.
     502    RecursivelyDeleteTriviallyDeadInstructions(remainingItems);
    501503
    502504    itemsDone = producerPos[0];
     
    505507    iBuilder->CreateBr(segmentDone);
    506508
     509    if (useIndirectBr()) {
     510        const auto destinations = mStrideLoopBranch->getNumDestinations();
     511        assert (mStrideLoopBranchAddress->getNumIncomingValues() == destinations);
     512        if (destinations == 1) {
     513            // Final block does not call DoBlock. Replace the indirect branch with a direct one.
     514            iBuilder->SetInsertPoint(mStrideLoopBranch);
     515            iBuilder->CreateBr(strideLoopCond);
     516            mStrideLoopBranch->eraseFromParent();
     517            mStrideLoopBranch = nullptr;
     518            mStrideLoopBranchAddress->eraseFromParent();
     519            mStrideLoopBranchAddress = nullptr;
     520        } else {
     521            MDBuilder mdb(iBuilder->getContext());
     522            uint32_t weights[destinations] = { 100, 0 };
     523            ArrayRef<uint32_t> bw(weights, destinations);
     524            mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
     525        }
     526    }
     527
     528    segmentDone->moveAfter(iBuilder->GetInsertBlock());
     529
    507530    iBuilder->SetInsertPoint(segmentDone);
    508 }
    509 
    510 void BlockOrientedKernel::generateDoBlockMethod(FunctionPassManager & fpm) {
     531
     532}
     533
     534void BlockOrientedKernel::writeDoBlockMethod() {
    511535
    512536    Value * const self = mSelf;
     
    515539
    516540    /// Check if the do block method is called and create the function if necessary   
    517     if (isCalled()) {
     541    if (!useIndirectBr()) {
    518542        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
    519543        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     
    527551        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    528552    }
    529 
    530     writeDoBlockMethod();
    531 
    532     /// Call the do block method if necessary then restore the current function state to the do segement method
    533 
    534     if (isCalled()) {
    535         iBuilder->CreateRetVoid();
    536         mDoBlockMethod = mCurrentMethod;
    537         fpm.run(*mCurrentMethod);
    538         iBuilder->restoreIP(ip);
    539         iBuilder->CreateCall(mCurrentMethod, self);
    540 
    541         mSelf = self;
    542         mCurrentMethod = cp;
    543     }
    544 
    545 }
    546 
    547 void BlockOrientedKernel::writeDoBlockMethod() {
    548553
    549554    std::vector<Value *> priorProduced;
     
    574579    }
    575580
    576 }
    577 
    578 void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingItems, FunctionPassManager & fpm) {
     581    /// Call the do block method if necessary then restore the current function state to the do segement method
     582
     583    if (!useIndirectBr()) {
     584        iBuilder->CreateRetVoid();
     585        mDoBlockMethod = mCurrentMethod;
     586        iBuilder->restoreIP(ip);
     587        iBuilder->CreateCall(mCurrentMethod, self);
     588        mSelf = self;
     589        mCurrentMethod = cp;
     590    }
     591
     592}
     593
     594void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
    579595
    580596    Value * const self = mSelf;
     
    583599    auto ip = iBuilder->saveIP();
    584600
    585     if (isCalled()) {
     601    if (!useIndirectBr()) {
    586602        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
    587603        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
     
    599615    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernelBuilder subtype
    600616
    601     if (isCalled()) {
     617    if (!useIndirectBr()) {
    602618        iBuilder->CreateRetVoid();       
    603         fpm.run(*mCurrentMethod);
    604619        iBuilder->restoreIP(ip);
    605620        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
     
    616631
    617632void BlockOrientedKernel::CreateDoBlockMethodCall() {
    618     if (isCalled()) {
     633    if (useIndirectBr()) {
     634        BasicBlock * bb = CreateBasicBlock("resume");
     635        mStrideLoopBranch->addDestination(bb);
     636        mStrideLoopBranchAddress->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
     637        iBuilder->CreateBr(mStrideLoopBody);
     638        iBuilder->SetInsertPoint(bb);
     639    } else {
    619640        iBuilder->CreateCall(mDoBlockMethod, mSelf);
    620     } else {
    621         // TODO: can we clone the DoBlock method instead of regenerating it?
    622         writeDoBlockMethod();
    623641    }
    624642}
     
    635653: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
    636654, mDoBlockMethod(nullptr)
    637 , mInlined(false) {
     655, mStrideLoopBody(nullptr)
     656, mStrideLoopBranch(nullptr)
     657, mStrideLoopBranchAddress(nullptr) {
    638658
    639659}
     
    649669                             std::vector<Binding> && internal_scalars)
    650670: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     671, mSelf(nullptr)
     672, mCurrentMethod(nullptr)
    651673, mNoTerminateAttribute(false)
    652674, mDoBlockUpdatesProducedItemCountsAttribute(false) {
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5347 r5350  
    1212#include <boost/container/flat_map.hpp>
    1313#include <IR_Gen/idisa_builder.h>
    14 #include "llvm/Support/Debug.h"
     14
    1515namespace llvm { class ConstantInt; }
    1616namespace llvm { class Function; }
    17 namespace llvm { namespace legacy { class FunctionPassManager; } }
    1817namespace llvm { class IntegerType; }
    1918namespace llvm { class LoadInst; }
     
    138137    virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    139138
    140 //    virtual void generateInternalMethods() { }
    141 
    142139    // Add an additional scalar field to the KernelState struct.
    143140    // Must occur before any call to addKernelDeclarations or createKernelModule.
     
    323320    virtual ~BlockOrientedKernel() { }
    324321
    325     bool isCalled() const {
    326         return !mInlined;
    327     }
    328 
    329     bool isInlined() const {
    330         return mInlined;
    331     }
    332 
    333     void setInlined(const bool value = true) {
    334         mInlined = value;
    335     }
    336 
    337322private:
    338323
    339     void generateDoBlockMethod(llvm::legacy::FunctionPassManager & fpm);
     324    bool useIndirectBr() const {
     325        return iBuilder->supportsIndirectBr();
     326    }
    340327
    341328    void writeDoBlockMethod();
    342329
    343     void generateFinalBlockMethod(llvm::Value *remainingItems, llvm::legacy::FunctionPassManager & fpm);
     330    void writeFinalBlockMethod(llvm::Value * remainingItems);
    344331
    345332private:
    346333
    347     llvm::Function * mDoBlockMethod;
    348     bool             mInlined;
     334    llvm::Function *        mDoBlockMethod;
     335    llvm::BasicBlock *      mStrideLoopBody;
     336    llvm::IndirectBrInst *  mStrideLoopBranch;
     337    llvm::PHINode *         mStrideLoopBranchAddress;
    349338};
    350339
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5347 r5350  
    166166    setNoTerminateAttribute(true);
    167167    setDoBlockUpdatesProducedItemCountsAttribute(false);
    168     setInlined(true);
    169168}
    170169
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5347 r5350  
    254254    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    255255, mGrepType(grepType) {
    256     setInlined(true);
    257 }
    258 
    259 }
     256}
     257
     258}
  • icGREP/icgrep-devel/icgrep/kernels/unicode_linebreak_kernel.cpp

    r5347 r5350  
    6969    Var * const r = addOutput("unicodeLineBreak", getStreamTy());
    7070    builder.createAssign(r, UnicodeLineBreak);
    71 
    72     setInlined(true);
    7371}
Note: See TracChangeset for help on using the changeset viewer.