Changeset 5424


Ignore:
Timestamp:
Apr 25, 2017, 12:27:24 PM (2 years ago)
Author:
cameron
Message:

ShowKernelCycles? option

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5422 r5424  
    727727}
    728728
     729Value * CBuilder::CreateReadCycleCounter() {
     730    Value * cycleCountFunc = Intrinsic::getDeclaration(mMod, Intrinsic::readcyclecounter);
     731    return CreateCall(cycleCountFunc, std::vector<Value *>({}));
     732}
     733
     734
    729735CBuilder::CBuilder(Module * const m, const unsigned GeneralRegisterWidthInBits, const bool SupportsIndirectBr, const unsigned CacheLineAlignmentInBytes)
    730736: IRBuilder<>(m->getContext())
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5422 r5424  
    177177
    178178    llvm::Value * CreateCeilLog2(llvm::Value * value);
     179   
     180    llvm::Value * CreateReadCycleCounter();
    179181
    180182protected:
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5418 r5424  
    439439    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    440440    Function * main = entryBlock->getParent();
     441    Value * mCycleCounts = nullptr;
     442    if (codegen::EnableCycleCounter) {
     443        ArrayType * cycleCountArray = ArrayType::get(iBuilder->getInt64Ty(), kernels.size());
     444        mCycleCounts = iBuilder->CreateAlloca(ArrayType::get(iBuilder->getInt64Ty(), kernels.size()));
     445        iBuilder->CreateStore(Constant::getNullValue(cycleCountArray), mCycleCounts);
     446    }
    441447
    442448    // Create the basic blocks for the loop.
     
    449455    iBuilder->CreateBr(pipelineLoop);
    450456    iBuilder->SetInsertPoint(pipelineLoop);
    451 
     457   
     458    Value * cycleCountStart = nullptr;
     459    Value * cycleCountEnd = nullptr;
     460    if (codegen::EnableCycleCounter) {
     461        cycleCountStart = iBuilder->CreateReadCycleCounter();
     462    }
    452463    Value * terminated = iBuilder->getFalse();
    453     for (auto & kernel : kernels) {
     464    for (unsigned k = 0; k < kernels.size(); k++) {
     465        auto & kernel = kernels[k];
    454466
    455467        const auto & inputs = kernel->getStreamInputs();
     
    487499            }
    488500        }
    489 
     501        if (codegen::EnableCycleCounter) {
     502            cycleCountEnd = iBuilder->CreateReadCycleCounter();
     503            Value * counterPtr = iBuilder->CreateGEP(mCycleCounts, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
     504            iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
     505            cycleCountStart = cycleCountEnd;
     506        }
    490507        Value * const segNo = kernel->acquireLogicalSegmentNo();
    491508        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     
    506523    iBuilder->CreateCondBr(terminated, pipelineExit, pipelineLoop);
    507524    iBuilder->SetInsertPoint(pipelineExit);
     525    if (codegen::EnableCycleCounter) {
     526        for (unsigned k = 0; k < kernels.size(); k++) {
     527            auto & kernel = kernels[k];
     528            const auto & inputs = kernel->getStreamInputs();
     529            const auto & outputs = kernel->getStreamOutputs();
     530            Value * items = inputs.size() > 0 ? kernel->getProcessedItemCount(inputs[0].name) : kernel->getProducedItemCount(outputs[0].name);
     531            Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
     532            Value * cycles = iBuilder->CreateLoad(iBuilder->CreateGEP(mCycleCounts, {iBuilder->getInt32(0), iBuilder->getInt32(k)}));
     533            Value * fCycles = iBuilder->CreateUIToFP(cycles, iBuilder->getDoubleTy());
     534            std::string formatString = kernel->getName() + ": %7.2e items processed; %7.2e CPU cycles,  %6.2f cycles per item.\n";
     535            Value * stringPtr = iBuilder->CreatePointerCast(iBuilder->CreateGlobalString(formatString.c_str()), iBuilder->getInt8PtrTy());
     536            iBuilder->CreateCall(iBuilder->GetDprintf(), {iBuilder->getInt32(2), stringPtr, fItems, fCycles, iBuilder->CreateFDiv(fCycles, fItems)});
     537        }
     538    }
    508539}
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5422 r5424  
    6969int ThreadNum;
    7070bool EnableAsserts;
     71bool EnableCycleCounter;
    7172#ifndef NDEBUG
    7273#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
     
    8081static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
    8182static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
     83static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
    8284
    8385const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.h

    r5418 r5424  
    4545extern int ThreadNum;
    4646extern bool EnableAsserts;
     47extern bool EnableCycleCounter;
    4748#ifdef CUDA_ENABLED
    4849extern bool NVPTX;
Note: See TracChangeset for help on using the changeset viewer.