Ignore:
Timestamp:
Feb 26, 2017, 4:30:51 PM (2 years ago)
Author:
nmedfort
Message:

First attempt at inlining all DoBlock? and FinalBlock? functions by using indirect jumps. Disabled for NVPTX until Linda can check whether they're supported by the LLVM NVPTX library.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5347 r5350  
    1010#include <llvm/IR/Function.h>
    1111#include <llvm/IR/Instructions.h>
     12#include <llvm/IR/MDBuilder.h>
    1213#include <llvm/IR/Module.h>
     14#include <llvm/IR/Verifier.h>
    1315#include <llvm/Support/raw_ostream.h>
    14 #include <llvm/IR/LegacyPassManager.h>
    15 #include <llvm/Transforms/Scalar.h>
    16 #include <llvm/IR/Verifier.h>
     16#include <llvm/Transforms/Utils/Local.h>
    1717
    1818static const auto DO_BLOCK_SUFFIX = "_DoBlock";
     
    3535using namespace kernel;
    3636using namespace parabix;
    37 using namespace llvm::legacy;
    3837
    3938unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
     
    440439void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
    441440
    442     // Use the pass manager to optimize the function.
    443     FunctionPassManager fpm(iBuilder->getModule());
    444     #ifndef NDEBUG
    445     fpm.add(createVerifierPass());
    446     #endif
    447     fpm.add(createReassociatePass());             //Reassociate expressions.
    448     fpm.add(createGVNPass());                     //Eliminate common subexpressions.
    449     fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    450     fpm.doInitialization();
    451 
    452441    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
    453442    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
    454     BasicBlock * const strideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
     443    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
    455444    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
    456445
    457446    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    458 
    459447    Value * availablePos = producerPos[0];
    460448    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     
    466454    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
    467455    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
     456
    468457    iBuilder->CreateBr(strideLoopCond);
    469458
     
    472461    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    473462    Value * notDone = iBuilder->CreateICmpNE(stridesRemaining, iBuilder->getSize(0));
    474     iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
    475 
    476     iBuilder->SetInsertPoint(strideLoopBody);
     463    iBuilder->CreateCondBr(notDone, mStrideLoopBody, stridesDone);
     464
     465    iBuilder->SetInsertPoint(mStrideLoopBody);
     466
     467    if (useIndirectBr()) {
     468        mStrideLoopBranchAddress = iBuilder->CreatePHI(iBuilder->getInt8PtrTy(), 2);
     469        mStrideLoopBranchAddress->addIncoming(BlockAddress::get(strideLoopCond), strideLoopCond);       
     470    }
    477471
    478472    /// GENERATE DO BLOCK METHOD
    479473
    480     generateDoBlockMethod(fpm);
     474    writeDoBlockMethod();
    481475
    482476    /// UPDATE PROCESSED COUNTS
     
    487481
    488482    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    489     iBuilder->CreateBr(strideLoopCond);
     483
     484    if (useIndirectBr()) {
     485        mStrideLoopBranch = iBuilder->CreateIndirectBr(mStrideLoopBranchAddress, 2);
     486        mStrideLoopBranch->addDestination(strideLoopCond);
     487    } else {
     488        iBuilder->CreateBr(strideLoopCond);
     489    }
    490490
    491491    iBuilder->SetInsertPoint(stridesDone);
     
    498498
    499499    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
    500     generateFinalBlockMethod(remainingItems, fpm);
     500    writeFinalBlockMethod(remainingItems);
     501    // if remainingItems was not used, this will eliminate it.
     502    RecursivelyDeleteTriviallyDeadInstructions(remainingItems);
    501503
    502504    itemsDone = producerPos[0];
     
    505507    iBuilder->CreateBr(segmentDone);
    506508
     509    if (useIndirectBr()) {
     510        const auto destinations = mStrideLoopBranch->getNumDestinations();
     511        assert (mStrideLoopBranchAddress->getNumIncomingValues() == destinations);
     512        if (destinations == 1) {
     513            // Final block does not call DoBlock. Replace the indirect branch with a direct one.
     514            iBuilder->SetInsertPoint(mStrideLoopBranch);
     515            iBuilder->CreateBr(strideLoopCond);
     516            mStrideLoopBranch->eraseFromParent();
     517            mStrideLoopBranch = nullptr;
     518            mStrideLoopBranchAddress->eraseFromParent();
     519            mStrideLoopBranchAddress = nullptr;
     520        } else {
     521            MDBuilder mdb(iBuilder->getContext());
     522            uint32_t weights[destinations] = { 100, 0 };
     523            ArrayRef<uint32_t> bw(weights, destinations);
     524            mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
     525        }
     526    }
     527
     528    segmentDone->moveAfter(iBuilder->GetInsertBlock());
     529
    507530    iBuilder->SetInsertPoint(segmentDone);
    508 }
    509 
    510 void BlockOrientedKernel::generateDoBlockMethod(FunctionPassManager & fpm) {
     531
     532}
     533
     534void BlockOrientedKernel::writeDoBlockMethod() {
    511535
    512536    Value * const self = mSelf;
     
    515539
    516540    /// Check if the do block method is called and create the function if necessary   
    517     if (isCalled()) {
     541    if (!useIndirectBr()) {
    518542        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
    519543        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     
    527551        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    528552    }
    529 
    530     writeDoBlockMethod();
    531 
    532     /// Call the do block method if necessary then restore the current function state to the do segement method
    533 
    534     if (isCalled()) {
    535         iBuilder->CreateRetVoid();
    536         mDoBlockMethod = mCurrentMethod;
    537         fpm.run(*mCurrentMethod);
    538         iBuilder->restoreIP(ip);
    539         iBuilder->CreateCall(mCurrentMethod, self);
    540 
    541         mSelf = self;
    542         mCurrentMethod = cp;
    543     }
    544 
    545 }
    546 
    547 void BlockOrientedKernel::writeDoBlockMethod() {
    548553
    549554    std::vector<Value *> priorProduced;
     
    574579    }
    575580
    576 }
    577 
    578 void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingItems, FunctionPassManager & fpm) {
     581    /// Call the do block method if necessary then restore the current function state to the do segement method
     582
     583    if (!useIndirectBr()) {
     584        iBuilder->CreateRetVoid();
     585        mDoBlockMethod = mCurrentMethod;
     586        iBuilder->restoreIP(ip);
     587        iBuilder->CreateCall(mCurrentMethod, self);
     588        mSelf = self;
     589        mCurrentMethod = cp;
     590    }
     591
     592}
     593
     594void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
    579595
    580596    Value * const self = mSelf;
     
    583599    auto ip = iBuilder->saveIP();
    584600
    585     if (isCalled()) {
     601    if (!useIndirectBr()) {
    586602        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
    587603        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
     
    599615    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernelBuilder subtype
    600616
    601     if (isCalled()) {
     617    if (!useIndirectBr()) {
    602618        iBuilder->CreateRetVoid();       
    603         fpm.run(*mCurrentMethod);
    604619        iBuilder->restoreIP(ip);
    605620        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
     
    616631
    617632void BlockOrientedKernel::CreateDoBlockMethodCall() {
    618     if (isCalled()) {
     633    if (useIndirectBr()) {
     634        BasicBlock * bb = CreateBasicBlock("resume");
     635        mStrideLoopBranch->addDestination(bb);
     636        mStrideLoopBranchAddress->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
     637        iBuilder->CreateBr(mStrideLoopBody);
     638        iBuilder->SetInsertPoint(bb);
     639    } else {
    619640        iBuilder->CreateCall(mDoBlockMethod, mSelf);
    620     } else {
    621         // TODO: can we clone the DoBlock method instead of regenerating it?
    622         writeDoBlockMethod();
    623641    }
    624642}
     
    635653: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
    636654, mDoBlockMethod(nullptr)
    637 , mInlined(false) {
     655, mStrideLoopBody(nullptr)
     656, mStrideLoopBranch(nullptr)
     657, mStrideLoopBranchAddress(nullptr) {
    638658
    639659}
     
    649669                             std::vector<Binding> && internal_scalars)
    650670: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     671, mSelf(nullptr)
     672, mCurrentMethod(nullptr)
    651673, mNoTerminateAttribute(false)
    652674, mDoBlockUpdatesProducedItemCountsAttribute(false) {
Note: See TracChangeset for help on using the changeset viewer.