Ignore:
Timestamp:
Oct 25, 2017, 4:57:58 PM (21 months ago)
Author:
nmedfort
Message:

First stage of MultiBlockKernel? and pipeline restructuring

Location:
icGREP/icgrep-devel/icgrep/toolchain
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r5646 r5706  
    106106
    107107    // note: instantiation of all kernels must occur prior to initialization
    108     for (const auto & k : mPipeline) {
     108    for (Kernel * const k : mPipeline) {
    109109        k->addKernelDeclarations(iBuilder);
    110110    }
    111     for (const auto & k : mPipeline) {
     111    for (Kernel * const k : mPipeline) {
    112112        k->createInstance(iBuilder);
    113113    }
    114     for (const auto & k : mPipeline) {
     114    for (Kernel * const k : mPipeline) {
    115115        k->initializeInstance(iBuilder);
    116116    }
  • icGREP/icgrep-devel/icgrep/toolchain/grep_pipeline.h

    r5695 r5706  
    1616class MatchAccumulator {
    1717public:
    18     MatchAccumulator() {};
     18    MatchAccumulator() {}
    1919    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
    2020    virtual void finalize_match(char * buffer_end) {}  // default: no op
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5615 r5706  
    1111#include <boost/container/flat_set.hpp>
    1212#include <boost/container/flat_map.hpp>
    13 #include <llvm/Support/CommandLine.h>
    1413#include <kernels/kernel_builder.h>
    15 
    16 #include <llvm/Support/raw_ostream.h>
    1714
    1815using namespace kernel;
     
    2017using namespace llvm;
    2118
    22 // static cl::opt<bool> UseYield("yield", cl::desc("yield after waiting"), cl::init(false));
     19using Port = Kernel::Port;
    2320
    2421template <typename Value>
     
    3431    return f;
    3532}
     33
     34void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel);
    3635
    3736/** ------------------------------------------------------------------------------------------------------------- *
     
    111110
    112111        BasicBlock * segmentYield = segmentWait;
    113 //        if (UseYield) {
    114 //            segmentYield = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Yield", threadFunc);
    115 //        }
    116 
    117112        iBuilder->CreateBr(segmentWait);
    118113
     
    144139            iBuilder->CreateBr(exitThreadBlock);
    145140        }
    146 
    147 //        if (UseYield) {
    148 //            // Yield the thread after waiting
    149 //            iBuilder->SetInsertPoint(segmentYield);
    150 //            iBuilder->CreatePThreadYield();
    151 //            iBuilder->CreateBr(segmentWait);
    152 //        }
    153141
    154142        // Execute the kernel segment
     
    170158        const auto & outputs = kernel->getStreamOutputs();
    171159        for (unsigned i = 0; i < outputs.size(); ++i) {           
    172             Value * const produced = iBuilder->getProducedItemCount(outputs[i].name, terminated);
     160            Value * const produced = iBuilder->getProducedItemCount(outputs[i].getName()); // terminated
    173161            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    174162            assert (producedPos.count(buf) == 0);
     
    176164        }
    177165        for (unsigned i = 0; i < inputs.size(); ++i) {
    178             Value * const processedItemCount = iBuilder->getProcessedItemCount(inputs[i].name);
     166            Value * const processedItemCount = iBuilder->getProcessedItemCount(inputs[i].getName());
    179167            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);           
    180168            auto f = consumedPos.find(buf);
     
    188176        if (codegen::EnableCycleCounter) {
    189177            cycleCountEnd = iBuilder->CreateReadCycleCounter();
    190             Value * counterPtr = iBuilder->getScalarFieldPtr(Kernel::CYCLECOUNT_SCALAR);
     178            Value * counterPtr = iBuilder->getCycleCountPtr();
    191179            iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
    192180            cycleCountStart = cycleCountEnd;
     
    201189    for (const auto consumed : consumedPos) {
    202190        const StreamSetBuffer * const buf = consumed.first;
    203         Kernel * kernel = buf->getProducer();
    204         const auto & outputs = kernel->getStreamSetOutputBuffers();
     191        Kernel * const k = buf->getProducer();
     192        const auto & outputs = k->getStreamSetOutputBuffers();
    205193        for (unsigned i = 0; i < outputs.size(); ++i) {
    206194            if (outputs[i] == buf) {
    207                 iBuilder->setKernel(kernel);
    208                 iBuilder->setConsumedItemCount(kernel->getStreamOutput(i).name, consumed.second);
     195                const auto binding = k->getStreamOutput(i);
     196                if (LLVM_UNLIKELY(binding.getRate().isDerived())) {
     197                    continue;
     198                }
     199                iBuilder->setKernel(k);
     200                iBuilder->setConsumedItemCount(binding.getName(), consumed.second);
    209201                break;
    210202            }
     
    287279            Value * items = nullptr;
    288280            if (inputs.empty()) {
    289                 items = iBuilder->getProducedItemCount(outputs[0].name);
     281                items = iBuilder->getProducedItemCount(outputs[0].getName());
    290282            } else {
    291                 items = iBuilder->getProcessedItemCount(inputs[0].name);
     283                items = iBuilder->getProcessedItemCount(inputs[0].getName());
    292284            }
    293285            Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
    294             Value * cycles = iBuilder->CreateLoad(iBuilder->getScalarFieldPtr(Kernel::CYCLECOUNT_SCALAR));
     286            Value * cycles = iBuilder->CreateLoad(iBuilder->getCycleCountPtr());
    295287            Value * fCycles = iBuilder->CreateUIToFP(cycles, iBuilder->getDoubleTy());
    296288            std::string formatString = kernel->getName() + ": %7.2e items processed; %7.2e CPU cycles,  %6.2f cycles per item.\n";
     
    356348            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(j);
    357349            if (LLVM_UNLIKELY(producingKernel.count(buf) != 0)) {
    358                 report_fatal_error(kernel->getName() + " redefines stream set " + outputs[j].name);
     350                report_fatal_error(kernel->getName() + " redefines stream set " + outputs[j].getName());
    359351            }
    360352            producingKernel.emplace(buf, id);
     
    366358            if (f == consumingKernels.end()) {
    367359                if (LLVM_UNLIKELY(producingKernel.count(buf) == 0)) {
    368                     report_fatal_error(kernel->getName() + " uses stream set " + inputs[j].name + " prior to its definition");
     360                    report_fatal_error(kernel->getName() + " uses stream set " + inputs[j].getName() + " prior to its definition");
    369361                }
    370362                consumingKernels.emplace(buf, std::vector<unsigned>{ id });
     
    528520    }
    529521    Value * terminated = iBuilder->getFalse();
    530     for (unsigned k = 0; k < kernels.size(); k++) {
    531 
    532         auto & kernel = kernels[k];
     522
     523    for (Kernel * const kernel : kernels) {
    533524
    534525        iBuilder->setKernel(kernel);
     
    536527        const auto & outputs = kernel->getStreamOutputs();
    537528
    538         std::vector<Value *> inputAvail;
    539529        std::vector<Value *> args = {kernel->getInstance(), terminated};
    540        
     530
    541531        for (unsigned i = 0; i < inputs.size(); ++i) {
    542532            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
    543533            if (LLVM_UNLIKELY(f == producedPos.end())) {
    544                 report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].name + " prior to its definition");
    545             }
    546             inputAvail.push_back(f->second);
     534                report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].getName() + " prior to its definition");
     535            }
    547536            args.push_back(f->second);
    548537        }
    549         applyOutputBufferExpansions(iBuilder, inputAvail, terminated);
     538
     539        applyOutputBufferExpansions(iBuilder, kernel);
    550540
    551541        iBuilder->createDoSegmentCall(args);
     542
    552543        if (!kernel->hasNoTerminateAttribute()) {
    553544            Value * terminatedSignal = iBuilder->getTerminationSignal();
     
    555546        }
    556547        for (unsigned i = 0; i < outputs.size(); ++i) {
    557             Value * const produced = iBuilder->getProducedItemCount(outputs[i].name, terminated);
     548            Value * const produced = iBuilder->getProducedItemCount(outputs[i].getName()); // , terminated
    558549            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    559550            assert (producedPos.count(buf) == 0);
     
    562553
    563554        for (unsigned i = 0; i < inputs.size(); ++i) {
    564             Value * const processedItemCount = iBuilder->getProcessedItemCount(inputs[i].name);
     555            Value * const processed = iBuilder->getProcessedItemCount(inputs[i].getName());
    565556            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
    566557            auto f = consumedPos.find(buf);
    567558            if (f == consumedPos.end()) {
    568                 consumedPos.emplace(buf, processedItemCount);
     559                consumedPos.emplace(buf, processed);
    569560            } else {
    570                 Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
    571                 f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
     561                Value * lesser = iBuilder->CreateICmpULT(processed, f->second);
     562                f->second = iBuilder->CreateSelect(lesser, processed, f->second);
    572563            }
    573564        }
    574565        if (codegen::EnableCycleCounter) {
    575566            cycleCountEnd = iBuilder->CreateReadCycleCounter();
    576             //Value * counterPtr = iBuilder->CreateGEP(mCycleCounts, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    577             Value * counterPtr = iBuilder->getScalarFieldPtr(Kernel::CYCLECOUNT_SCALAR);
     567            Value * counterPtr = iBuilder->getCycleCountPtr();
    578568            iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
    579569            cycleCountStart = cycleCountEnd;
     
    587577    for (const auto consumed : consumedPos) {
    588578        const StreamSetBuffer * const buf = consumed.first;
    589         Kernel * k = buf->getProducer();
     579        Kernel * const k = buf->getProducer();
    590580        const auto & outputs = k->getStreamSetOutputBuffers();
    591581        for (unsigned i = 0; i < outputs.size(); ++i) {
    592582            if (outputs[i] == buf) {
     583                const auto binding = k->getStreamOutput(i);
     584                if (LLVM_UNLIKELY(binding.getRate().isDerived())) {
     585                    continue;
     586                }
    593587                iBuilder->setKernel(k);
    594                 iBuilder->setConsumedItemCount(k->getStreamOutput(i).name, consumed.second);
     588                iBuilder->setConsumedItemCount(binding.getName(), consumed.second);
    595589                break;
    596590            }
     
    599593
    600594    iBuilder->CreateCondBr(terminated, pipelineExit, pipelineLoop);
     595
    601596    iBuilder->SetInsertPoint(pipelineExit);
     597
    602598    if (codegen::EnableCycleCounter) {
    603599        for (unsigned k = 0; k < kernels.size(); k++) {
     
    608604            Value * items = nullptr;
    609605            if (inputs.empty()) {
    610                 items = iBuilder->getProducedItemCount(outputs[0].name);
     606                items = iBuilder->getProducedItemCount(outputs[0].getName());
    611607            } else {
    612                 items = iBuilder->getProcessedItemCount(inputs[0].name);
     608                items = iBuilder->getProcessedItemCount(inputs[0].getName());
    613609            }
    614610            Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
    615             Value * cycles = iBuilder->CreateLoad(iBuilder->getScalarFieldPtr(Kernel::CYCLECOUNT_SCALAR));
     611            Value * cycles = iBuilder->CreateLoad(iBuilder->getCycleCountPtr());
    616612            Value * fCycles = iBuilder->CreateUIToFP(cycles, iBuilder->getDoubleTy());
    617613            std::string formatString = kernel->getName() + ": %7.2e items processed; %7.2e CPU cycles,  %6.2f cycles per item.\n";
     
    621617    }
    622618}
     619
     620void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const std::string & name, DynamicBuffer * const db, const uint64_t l) {
     621
     622    BasicBlock * const doExpand = b->CreateBasicBlock(name + "Expand");
     623    BasicBlock * const nextBlock = b->GetInsertBlock()->getNextNode();
     624    doExpand->moveAfter(b->GetInsertBlock());
     625    BasicBlock * const bufferReady = b->CreateBasicBlock(name + "Ready");
     626    bufferReady->moveAfter(doExpand);
     627    if (nextBlock) nextBlock->moveAfter(bufferReady);
     628
     629    Value * const handle = db->getStreamSetHandle();
     630
     631    Value * const produced = b->getProducedItemCount(name);
     632    Value * const consumed = b->getConsumedItemCount(name);
     633    Value * const required = b->CreateAdd(b->CreateSub(produced, consumed), b->getSize(2 * l));
     634
     635    b->CreateCondBr(b->CreateICmpUGT(required, db->getCapacity(b.get(), handle)), doExpand, bufferReady);
     636
     637    b->SetInsertPoint(doExpand);
     638    db->doubleCapacity(b.get(), handle);
     639    // Ensure that capacity is sufficient by successive doubling, if necessary.
     640    b->CreateCondBr(b->CreateICmpUGT(required, db->getBufferedSize(b.get(), handle)), doExpand, bufferReady);
     641
     642    b->SetInsertPoint(bufferReady);
     643}
     644
     645inline const Binding & getBinding(const Kernel * k, const std::string & name) {
     646    Port port; unsigned index;
     647    std::tie(port, index) = k->getStreamPort(name);
     648    if (port == Port::Input) {
     649        return k->getStreamInput(index);
     650    } else {
     651        return k->getStreamOutput(index);
     652    }
     653}
     654
     655void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const Kernel * k) {
     656    const auto & outputs = k->getStreamSetOutputBuffers();
     657    for (unsigned i = 0; i < outputs.size(); i++) {
     658        if (isa<DynamicBuffer>(outputs[i])) {
     659            const ProcessingRate & rate = k->getStreamOutput(i).getRate();
     660            if (rate.isFixed() || rate.isBounded()) {
     661                const auto & name = k->getStreamOutput(i).getName();
     662                const auto l = rate.getUpperBound() * k->getKernelStride();
     663                applyOutputBufferExpansions(b, name, cast<DynamicBuffer>(outputs[i]), l);
     664            } else if (rate.isExactlyRelative()) {
     665                const auto binding = getBinding(k, rate.getReference());
     666                const ProcessingRate & ref = binding.getRate();
     667                if (rate.isFixed() || rate.isBounded()) {
     668                    const auto & name = k->getStreamOutput(i).getName();
     669                    const auto l = (ref.getUpperBound() * rate.getNumerator() * k->getKernelStride() + rate.getDenominator() - 1) / rate.getDenominator();
     670                    applyOutputBufferExpansions(b, name, cast<DynamicBuffer>(outputs[i]), l);
     671                }
     672            }
     673        }
     674    }
     675}
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.h

    r5436 r5706  
    1313namespace kernel { class KernelBuilder; }
    1414
    15 void generateSegmentParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::vector<kernel::Kernel *> & kernels);
    16 void generateParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::vector<kernel::Kernel *> & kernels);
    17 void generatePipelineLoop(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, const std::vector<kernel::Kernel *> & kernels);
     15void generateSegmentParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::vector<kernel::Kernel *> & kernels);
     16void generateParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::vector<kernel::Kernel *> & kernels);
     17void generatePipelineLoop(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::vector<kernel::Kernel *> & kernels);
    1818
    1919#endif // PIPELINE_H
Note: See TracChangeset for help on using the changeset viewer.