Changeset 5390


Ignore:
Timestamp:
Apr 2, 2017, 4:17:51 PM (2 years ago)
Author:
nmedfort
Message:

Minor changes in preparation of incorporating a consumed stream set position into select kernels.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5375 r5390  
    4343
    4444Value * ProcessingRate::CreateRatioCalculation(IDISA::IDISA_Builder * b, Value * principalInputItems, Value * doFinal) const {
    45     Type * T = principalInputItems->getType();
    4645    if (mKind == ProcessingRate::ProcessingRateKind::Fixed || mKind == ProcessingRate::ProcessingRateKind::Max) {
    47         Value * strmItems = (ratio_numerator == 1) ? principalInputItems : b->CreateMul(principalInputItems, ConstantInt::get(T, ratio_numerator));
    48         if (ratio_denominator == 1) return strmItems;
    49         return b->CreateUDiv(b->CreateAdd(ConstantInt::get(T, ratio_denominator - 1), strmItems), ConstantInt::get(T, ratio_denominator));
     46        if (ratio_numerator == 1) {
     47            return principalInputItems;
     48        }
     49        Type * const T = principalInputItems->getType();
     50        Constant * const numerator = ConstantInt::get(T, ratio_numerator);
     51        Constant * const denominator = ConstantInt::get(T, ratio_denominator);
     52        Constant * const denominatorLess1 = ConstantInt::get(T, ratio_denominator - 1);
     53        Value * strmItems = b->CreateMul(principalInputItems, numerator);
     54        return b->CreateUDiv(b->CreateAdd(denominatorLess1, strmItems), denominator);
    5055    }
    5156    if (mKind == ProcessingRate::ProcessingRateKind::RoundUp) {
    52         Constant * multiple = ConstantInt::get(T, ratio_denominator);
    53         Constant * multipleLess1 = ConstantInt::get(T, ratio_denominator - 1);
    54         return b->CreateMul(b->CreateUDiv(b->CreateAdd(principalInputItems, multipleLess1), multiple), multiple);
     57        Type * const T = principalInputItems->getType();
     58        Constant * const denominator = ConstantInt::get(T, ratio_denominator);
     59        Constant * const denominatorLess1 = ConstantInt::get(T, ratio_denominator - 1);
     60        return b->CreateMul(b->CreateUDiv(b->CreateAdd(principalInputItems, denominatorLess1), denominator), denominator);
    5561    }
    5662    if (mKind == ProcessingRate::ProcessingRateKind::Add1) {
    57         return b->CreateAdd(principalInputItems, b->CreateZExt(doFinal, principalInputItems->getType()));
    58     }
    59     return nullptr;
    60 }
    61 
    62 Value * ProcessingRate::CreateRatioCalculation(IDISA::IDISA_Builder * b, Value * principalInputItems) const {
    63     Type * T = principalInputItems->getType();
    64     if (mKind == ProcessingRate::ProcessingRateKind::Fixed || mKind == ProcessingRate::ProcessingRateKind::Max) {
    65         Value * strmItems = (ratio_numerator == 1) ? principalInputItems : b->CreateMul(principalInputItems, ConstantInt::get(T, ratio_numerator));
    66         if (ratio_denominator == 1) return strmItems;
    67         return b->CreateUDiv(b->CreateAdd(ConstantInt::get(T, ratio_denominator - 1), strmItems), ConstantInt::get(T, ratio_denominator));
    68     }
    69     if (mKind == ProcessingRate::ProcessingRateKind::RoundUp) {
    70         Constant * multiple = ConstantInt::get(T, ratio_denominator);
    71         Constant * multipleLess1 = ConstantInt::get(T, ratio_denominator - 1);
    72         return b->CreateMul(b->CreateUDiv(b->CreateAdd(principalInputItems, multipleLess1), multiple), multiple);
    73     }
    74     if (mKind == ProcessingRate::ProcessingRateKind::Add1) {
     63        if (doFinal) {
     64            Type * const T = principalInputItems->getType();
     65            principalInputItems = b->CreateAdd(principalInputItems, b->CreateZExt(doFinal, T));
     66        }
    7567        return principalInputItems;
    7668    }
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5375 r5390  
    4343    ProcessingRateKind getKind() const {return mKind;}
    4444    bool isExact() const {return (mKind == Fixed)||(mKind == RoundUp)||(mKind == Add1) ;}
    45     llvm::Value * CreateRatioCalculation(IDISA::IDISA_Builder * b, llvm::Value * principalInputItems, llvm::Value * doFinal) const;
    46     llvm::Value * CreateRatioCalculation(IDISA::IDISA_Builder * b, llvm::Value * principalInputItems) const;
     45    llvm::Value * CreateRatioCalculation(IDISA::IDISA_Builder * b, llvm::Value * principalInputItems, llvm::Value * doFinal = nullptr) const;
    4746    friend ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet);
    4847    friend ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet);
     
    117116    }
    118117
     118    virtual llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * doFinal = nullptr) const = 0;
     119
     120    virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
     121
     122    virtual llvm::Value * getConsumedItemCount(llvm::Value * instance, const std::string & name) const = 0;
     123
     124    virtual void setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
     125
    119126    virtual llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const = 0;
    120127
    121128    virtual void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
    122 
    123 
    124 
    125     virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
    126129
    127130    virtual llvm::Value * getTerminationSignal(llvm::Value * instance) const = 0;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5389 r5390  
    2222
    2323static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
     24
     25static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
    2426
    2527static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
     
    243245    assert ("instance cannot be null!" && instance);
    244246    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
    245 }
    246 
    247 Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
    248     assert ("instance cannot be null!" && instance);
    249     unsigned ssIdx = getStreamSetIndex(name);
    250     if (mStreamSetInputs[ssIdx].rate.isExact()) {
    251         std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
    252         if (refSet.empty()) {
    253             refSet = mStreamSetInputs[0].name;
    254         }
    255         Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
    256         return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
    257     }
    258     return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
    259247}
    260248
     
    285273}
    286274
    287 Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
     275llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
     276    assert ("instance cannot be null!" && instance);
     277    return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
     278}
     279
     280Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
    288281    assert ("instance cannot be null!" && instance);
    289282    unsigned ssIdx = getStreamSetIndex(name);
    290     std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    291     if (mStreamSetOutputs[ssIdx].rate.isExact()) {
    292         std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
    293         std::string principalField;
     283    if (mStreamSetInputs[ssIdx].rate.isExact()) {
     284        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
    294285        if (refSet.empty()) {
    295             principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
    296         } else {
    297             unsigned pfIndex = getStreamSetIndex(refSet);
    298             if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
    299                principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    300             } else {
    301                principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
    302             }
    303         }
    304         Value * principalItemsProcessed = getScalarField(instance, principalField);
    305         return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
    306     }
    307     return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
     286            refSet = mStreamSetInputs[0].name;
     287        }
     288        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
     289        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
     290    }
     291    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
     292}
     293
     294void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
     295    assert ("instance cannot be null!" && instance);
     296    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
     297}
     298
     299void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
     300    assert ("instance cannot be null!" && instance);
     301    setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
    308302}
    309303
     
    311305    assert ("instance cannot be null!" && instance);
    312306    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    313 }
    314 
    315 void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
    316     assert ("instance cannot be null!" && instance);
    317     setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    318307}
    319308
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5386 r5390  
    3838    void createInstance() override;
    3939
    40     virtual llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const override;
    41 
    42     virtual void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const;
    43 
    44     llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * doFinal) const;
    45 
    46     llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name) const;
    47 
    48     virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const;
     40    llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * doFinal = nullptr) const final;
     41
     42    void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
     43
     44    llvm::Value * getConsumedItemCount(llvm::Value * instance, const std::string & name) const final;
     45
     46    void setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
     47
     48    llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const final;
     49
     50    void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    4951
    5052    virtual void reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * requested) const;
     
    5254    bool hasNoTerminateAttribute() { return mNoTerminateAttribute;}
    5355   
    54     llvm::Value * getTerminationSignal(llvm::Value * instance) const override final;
    55 
    56     void setTerminationSignal(llvm::Value * instance) const override final;
     56    llvm::Value * getTerminationSignal(llvm::Value * instance) const final;
     57
     58    void setTerminationSignal(llvm::Value * instance) const final;
    5759
    5860    // Get the value of a scalar field for a given instance.
     
    208210    }
    209211
     212    inline llvm::Value * getProducedItemCount(const std::string & name) const {
     213        return getProducedItemCount(getSelf(), name);
     214    }
     215
     216    inline void setProducedItemCount(const std::string & name, llvm::Value * value) const {
     217        setProducedItemCount(getSelf(), name, value);
     218    }
     219
     220    inline llvm::Value * getConsumedItemCount(const std::string & name) const {
     221        return getConsumedItemCount(getSelf(), name);
     222    }
     223
     224    inline void setConsumedItemCount(const std::string & name, llvm::Value * value) const {
     225        setConsumedItemCount(getSelf(), name, value);
     226    }
     227
    210228    inline llvm::Value * getProcessedItemCount(const std::string & name) const {
    211229        return getProcessedItemCount(getSelf(), name);
     
    214232    inline void setProcessedItemCount(const std::string & name, llvm::Value * value) const {
    215233        setProcessedItemCount(getSelf(), name, value);
    216     }
    217 
    218     inline llvm::Value * getProducedItemCount(const std::string & name) const {
    219         return getProducedItemCount(getSelf(), name);
    220     }
    221 
    222     inline void setProducedItemCount(const std::string & name, llvm::Value * value) const {
    223         setProducedItemCount(getSelf(), name, value);
    224234    }
    225235
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5370 r5390  
    99#include <kernels/streamset.h>
    1010#include <llvm/IR/Module.h>
    11 #include <unordered_map>
     11#include <boost/container/flat_map.hpp>
    1212
    1313using namespace kernel;
     
    1515using namespace llvm;
    1616
    17 #include <iostream>
    18 
    1917using ProducerTable = std::vector<std::vector<std::pair<unsigned, unsigned>>>;
    2018
     19using ConsumerTable = std::vector<std::vector<std::vector<unsigned>>>;
     20
     21template <typename Value>
     22using StreamSetBufferMap = boost::container::flat_map<const StreamSetBuffer *, Value>;
     23
     24
    2125ProducerTable createProducerTable(const std::vector<KernelBuilder *> & kernels) {
     26    // map each output streamSet to its producing kernel and output index.
     27    StreamSetBufferMap<std::pair<unsigned, unsigned>> map;
     28    for (unsigned k = 0; k < kernels.size(); k++) {
     29        const auto & outputSets = kernels[k]->getStreamSetOutputBuffers();
     30        for (unsigned j = 0; j < outputSets.size(); j++) {
     31            map.emplace(outputSets[j], std::make_pair(k, j));
     32        }
     33    }
     34    // TODO: replace this with a sparse matrix? it would be easier to understand that the i,j-th element indicated kernel i's input was from the j-th kernel
    2235    ProducerTable producerTable(kernels.size());
    23    
    24     std::vector<std::vector<bool>> userTable(kernels.size());
    25    
    26     // First prepare a map from streamSet output buffers to their producing kernel and output index.
    27     std::unordered_map<const StreamSetBuffer *, std::pair<unsigned, unsigned>> bufferMap;
    28    
    29     for (unsigned k = 0; k < kernels.size(); k++) {
    30         auto outputSets = kernels[k]->getStreamSetOutputBuffers();
    31         for (unsigned j = 0; j < outputSets.size(); j++) {
    32             userTable[k].push_back(false);
    33             bufferMap.insert(std::make_pair(outputSets[j], std::make_pair(k, j)));
    34         }
    35     }
    36     for (unsigned k = 0; k < kernels.size(); k++) {
    37         auto inputSets = kernels[k]->getStreamSetInputBuffers();
     36    for (unsigned k = 0; k < kernels.size(); k++) {
     37        const KernelBuilder * const kernel = kernels[k];
     38        const auto & inputSets = kernel->getStreamSetInputBuffers();
    3839        for (unsigned i = 0; i < inputSets.size(); i++) {
    39             auto f = bufferMap.find(inputSets[i]);
    40             if (f == bufferMap.end()) {
    41                 llvm::report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernels[k]->getName() + ": no corresponding output buffer. ");
     40            const auto f = map.find(inputSets[i]);
     41            if (LLVM_UNLIKELY(f == map.end())) {
     42                report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernel->getName() + ": no corresponding output buffer. ");
    4243            }
    43             producerTable[k].push_back(f->second);
    4444            unsigned sourceKernel, outputIndex;
    4545            std::tie(sourceKernel, outputIndex) = f->second;
    46             if (sourceKernel >= k) {
    47                 llvm::report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernels[k]->getName() + ": not defined before use. ");
     46            producerTable[k].emplace_back(sourceKernel, outputIndex);
     47            if (LLVM_UNLIKELY(sourceKernel >= k)) {
     48                report_fatal_error("Pipeline error: input buffer #" + std::to_string(i) + " of " + kernel->getName() + ": not defined before use. ");
    4849            }
    49             //errs() << "sourceKernel: " + std::to_string(sourceKernel) + ", outputIndex: " + std::to_string(outputIndex) + ", user: " + std::to_string(k) + "\n";
    50             userTable[sourceKernel][outputIndex]= true;
    51            
    52         }
    53     }
    54     /*  TODO:  define sinks for  all outputs so that the following check succeeds on
    55      *  well-structured pipelines.
    56     for (unsigned k = 0; k < kernels.size(); k++) {
    57         auto outputSets = kernels[k]->getStreamSetOutputBuffers();
    58         //errs() << "kernel: " + kernels[k]->getName() + "\n";
    59         for (unsigned j = 0; j < outputSets.size(); j++) {
    60             if (userTable[k][j] == false) {
    61                 llvm::report_fatal_error("Pipeline error: output buffer #" + std::to_string(j) + " of " + kernels[k]->getName() + ": no users. ");
    62             }
    63         }
    64     }
    65     */
     50        }
     51    }
    6652    return producerTable;
    6753}
    6854
    69 using ConsumerTable = std::vector<std::vector<std::vector<unsigned>>>;
    70 
    7155ConsumerTable createConsumerTable(const std::vector<KernelBuilder *> & kernels) {
    72     ConsumerTable consumerTable(kernels.size());
    73    
    74     // First prepare a map from streamSet input buffers to their consuming kernel and input index.
    75     std::unordered_map<const StreamSetBuffer *, std::vector<unsigned>> bufferMap;
    76    
    77     for (unsigned k = 0; k < kernels.size(); k++) {
    78         auto inputSets = kernels[k]->getStreamSetInputBuffers();
    79         for (unsigned j = 0; j < inputSets.size(); j++) {
    80             auto f = bufferMap.find(inputSets[j]);
    81             std::vector<unsigned> kernelNo;
    82             kernelNo.push_back(k);
    83             if (f == bufferMap.end()) {
    84                 bufferMap.insert(std::make_pair(inputSets[j], kernelNo));
    85             }
    86             else{
     56    // map each input streamSet to its set of consuming kernels
     57    StreamSetBufferMap<std::vector<unsigned>> map;
     58    for (unsigned k = 0; k < kernels.size(); k++) {
     59        const auto & inputSets = kernels[k]->getStreamSetInputBuffers();
     60        for (const StreamSetBuffer * inputSet : inputSets) {
     61            auto f = map.find(inputSet);
     62            if (f == map.end()) {
     63                map.emplace(inputSet, std::vector<unsigned>({k}));
     64            } else {
    8765                f->second.push_back(k);
    8866            }
    8967        }
    9068    }
    91     for (unsigned k = 0; k < kernels.size(); k++) {
    92         auto outputSets = kernels[k]->getStreamSetOutputBuffers();
    93         for (unsigned i = 0; i < outputSets.size(); i++) {
    94             auto f = bufferMap.find(outputSets[i]);
    95             if (f == bufferMap.end()) {
    96                 llvm::report_fatal_error("Pipeline error: output buffer #" + std::to_string(i) + " of " + kernels[k]->getName() + ": not used by any kernel. ");
    97             }
    98             else {
    99                 consumerTable[k].push_back(f->second); 
     69    ConsumerTable consumerTable(kernels.size());
     70    for (unsigned k = 0; k < kernels.size(); k++) {
     71        const auto & outputSets = kernels[k]->getStreamSetOutputBuffers();
     72        for (const StreamSetBuffer * outputSet : outputSets) {
     73            auto f = map.find(outputSet);
     74            if (LLVM_LIKELY(f != map.end())) {
     75                consumerTable[k].emplace_back(std::move(f->second));
    10076            }         
    10177        }
     
    10480}
    10581
    106 Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, ProducerTable & producerTable, int id) {
     82Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, int id) {
    10783   
    10884    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     
    169145        Value * ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
    170146
    171         if (kernels[k]->hasNoTerminateAttribute()) {
     147        KernelBuilder * const K = kernels[k];
     148
     149        if (K->hasNoTerminateAttribute()) {
    172150            iBuilder->CreateCondBr(ready, segmentLoopBody[k], segmentWait[k]);
    173151        } else { // If the kernel was terminated in a previous segment then the pipeline is done.
    174             BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), kernels[k]->getName() + "Completed", threadFunc, 0);
    175             BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), kernels[k]->getName() + "Exit", threadFunc, 0);
     152            BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), K->getName() + "Completed", threadFunc, 0);
     153            BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), K->getName() + "Exit", threadFunc, 0);
    176154            iBuilder->CreateCondBr(ready, completionTest, segmentWait[k]);
    177155            iBuilder->SetInsertPoint(completionTest);
    178             Value * alreadyDone = kernels[k]->getTerminationSignal(instancePtrs[k]);
     156            Value * alreadyDone = K->getTerminationSignal(instancePtrs[k]);
    179157            iBuilder->CreateCondBr(alreadyDone, exitBlock, segmentLoopBody[k]);
    180158            iBuilder->SetInsertPoint(exitBlock);
    181159            // Ensure that the next thread will also exit.
    182             kernels[k]->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
     160            K->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
    183161            iBuilder->CreateBr(exitThreadBlock);
    184162        }
    185163        iBuilder->SetInsertPoint(segmentLoopBody[k]);
    186164        std::vector<Value *> doSegmentArgs = {instancePtrs[k], doFinal};
    187         for (unsigned j = 0; j < kernels[k]->getStreamInputs().size(); j++) {
     165        for (unsigned j = 0; j < K->getStreamInputs().size(); j++) {
    188166            unsigned producerKernel, outputIndex;
    189167            std::tie(producerKernel, outputIndex) = producerTable[k][j];
    190168            doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
    191169        }
    192         kernels[k]->createDoSegmentCall(doSegmentArgs);
    193          if (! (kernels[k]->hasNoTerminateAttribute())) {
    194             Value * terminated = kernels[k]->getTerminationSignal(instancePtrs[k]);
     170        K->createDoSegmentCall(doSegmentArgs);
     171         if (! (K->hasNoTerminateAttribute())) {
     172            Value * terminated = K->getTerminationSignal(instancePtrs[k]);
    195173            doFinal = iBuilder->CreateOr(doFinal, terminated);
    196174        }
    197175       std::vector<Value *> produced;
    198         for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
    199             produced.push_back(kernels[k]->getProducedItemCount(instancePtrs[k], kernels[k]->getStreamOutputs()[i].name, doFinal));
     176        for (unsigned i = 0; i < K->getStreamOutputs().size(); i++) {
     177            produced.push_back(K->getProducedItemCount(instancePtrs[k], K->getStreamOutputs()[i].name, doFinal));
    200178        }
    201179        ProducerPos.push_back(produced);
    202180
    203         kernels[k]->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
     181        K->releaseLogicalSegmentNo(instancePtrs[k], nextSegNo);
    204182        if (k == last_kernel) {
    205183            segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(codegen::ThreadNum)), segmentLoopBody[last_kernel]);
     
    236214    for (auto k : kernels) k->createInstance();
    237215   
    238     ProducerTable producerTable = createProducerTable(kernels);
     216    const ProducerTable producerTable = createProducerTable(kernels);
    239217   
    240218    Type * const pthreadsTy = ArrayType::get(size_ty, codegen::ThreadNum);
     
    284262}
    285263
    286 Function * generateParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, ProducerTable & producerTable, ConsumerTable & consumerTable, int id) {
     264Function * generateParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels, Type * sharedStructType, const ProducerTable & producerTable, const ConsumerTable & consumerTable, const unsigned id) {
    287265       
    288266    const auto ip = iBuilder->saveIP();
     
    302280    input->setName("input");
    303281
    304     KernelBuilder * targetK = kernels[id];
     282    KernelBuilder * const targetK = kernels[id];
    305283    Value * bufferSegments = ConstantInt::get(size_ty, codegen::BufferSegments - 1);
    306284    ConstantInt * segmentItems = iBuilder->getSize(codegen::SegmentSize * iBuilder->getBitBlockWidth());
     
    319297    std::vector<std::vector<Value *>> ProducerPos;
    320298    for (unsigned k = 0; k < kernels.size(); k++) {
     299        KernelBuilder * K = kernels[k];
     300
    321301        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    322302        instancePtrs.push_back(iBuilder->CreateLoad(ptr));
    323303
    324304        std::vector<Value *> produced;
    325         for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
    326             produced.push_back(kernels[k]->getProducedItemCount(instancePtrs[k], kernels[k]->getStreamOutputs()[i].name));
     305        for (unsigned i = 0; i < K->getStreamOutputs().size(); i++) {
     306            produced.push_back(K->getProducedItemCount(instancePtrs[k], K->getStreamOutputs()[i].name));
    327307        }
    328308        ProducerPos.push_back(produced);
     
    336316    segNo->addIncoming(segNo, outputCheckBlock);
    337317
    338     waitCondTest = ConstantInt::get(int1ty, 1);
     318    waitCondTest = ConstantInt::getTrue(int1ty);
    339319    for (unsigned j = 0; j < targetK->getStreamOutputs().size(); j++) {
    340         std::vector<unsigned> consumerKernels = consumerTable[id][j];
     320        const auto & consumerKernels = consumerTable[id][j];
    341321        for (unsigned k = 0; k < consumerKernels.size(); k++) {
    342322            Value * consumerSegNo = kernels[consumerKernels[k]]->acquireLogicalSegmentNo(instancePtrs[consumerKernels[k]]);
     
    345325    }
    346326
    347     if(targetK->getStreamInputs().size() == 0) {
     327    if (targetK->getStreamInputs().empty()) {
    348328
    349329        iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, outputCheckBlock);
     
    360340        iBuilder->CreateCondBr(terminated, exitThreadBlock, outputCheckBlock);
    361341
    362     }
    363     else{
     342    } else {
    364343
    365344        BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
     
    369348        iBuilder->SetInsertPoint(inputCheckBlock);
    370349       
    371         waitCondTest = ConstantInt::get(int1ty, 1);
     350        waitCondTest = ConstantInt::getTrue(int1ty);
    372351        for (unsigned j = 0; j < targetK->getStreamInputs().size(); j++) {
    373352            unsigned producerKernel, outputIndex;
     
    395374            unsigned producerKernel, outputIndex;
    396375            std::tie(producerKernel, outputIndex) = producerTable[id][j];
    397             // doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
    398376            doSegmentArgs.push_back(iBuilder->CreateMul(segmentItems, segNo));
    399377        }
     
    425403    PointerType * const int8PtrTy = iBuilder->getInt8PtrTy();
    426404   
    427     for (auto k : kernels) k->createInstance();
    428    
    429     ProducerTable producerTable = createProducerTable(kernels);
    430     ConsumerTable consumerTable = createConsumerTable(kernels);
     405    for (auto k : kernels) {
     406        k->createInstance();
     407    }
     408   
     409    const ProducerTable producerTable = createProducerTable(kernels);
     410    const ConsumerTable consumerTable = createConsumerTable(kernels);
    431411   
    432412    Type * const pthreadsTy = ArrayType::get(size_ty, threadNum);
     
    451431    }
    452432    for (unsigned i = 0; i < kernels.size(); i++) {
    453         kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->getSize(0));
     433        KernelBuilder * const K = kernels[i];
     434        K->releaseLogicalSegmentNo(K->getInstance(), iBuilder->getSize(0));
    454435    }
    455436
     
    487468    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
    488469   
    489     ProducerTable producerTable = createProducerTable(kernels);
     470    const ProducerTable producer = createProducerTable(kernels);
     471
     472//    const ConsumerTable consumer = createConsumerTable(kernels);
    490473   
    491474    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     
    499482    iBuilder->SetInsertPoint(segmentLoop);
    500483
    501     Value * terminationFound = ConstantInt::getNullValue(iBuilder->getInt1Ty());
    502     for (unsigned k = 0; k < kernels.size(); k++) {
    503         Value * instance = kernels[k]->getInstance();
    504         std::vector<Value *> doSegmentArgs = {instance, terminationFound};
    505         for (unsigned j = 0; j < kernels[k]->getStreamInputs().size(); j++) {
     484    Value * terminated = ConstantInt::getFalse(iBuilder->getContext());
     485    for (unsigned k = 0; k < kernels.size(); k++) {
     486        KernelBuilder * const K = kernels[k];
     487        Value * const instance = K->getInstance();
     488        std::vector<Value *> args = {instance, terminated};
     489        for (unsigned j = 0; j < K->getStreamInputs().size(); j++) {
    506490            unsigned producerKernel, outputIndex;
    507             std::tie(producerKernel, outputIndex) = producerTable[k][j];
    508             doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
    509         }
    510         kernels[k]->createDoSegmentCall(doSegmentArgs);
    511         if (! (kernels[k]->hasNoTerminateAttribute())) {
    512             Value * terminated = kernels[k]->getTerminationSignal(instance);
    513             terminationFound = iBuilder->CreateOr(terminationFound, terminated);
     491            std::tie(producerKernel, outputIndex) = producer[k][j];
     492            args.push_back(ProducerPos[producerKernel][outputIndex]);
     493        }
     494        K->createDoSegmentCall(args);
     495        if (!K->hasNoTerminateAttribute()) {
     496            terminated = iBuilder->CreateOr(terminated, K->getTerminationSignal(instance));
    514497        }
    515498        std::vector<Value *> produced;
    516         for (unsigned i = 0; i < kernels[k]->getStreamOutputs().size(); i++) {
    517             produced.push_back(kernels[k]->getProducedItemCount(instance, kernels[k]->getStreamOutputs()[i].name, terminationFound));
     499        const auto & streamOutputs = K->getStreamOutputs();
     500        for (unsigned i = 0; i < streamOutputs.size(); i++) {
     501            produced.push_back(K->getProducedItemCount(instance, streamOutputs[i].name, terminated));
    518502        }
    519503        ProducerPos.push_back(produced);
    520         Value * segNo = kernels[k]->acquireLogicalSegmentNo(instance);
    521         kernels[k]->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    522     }
    523     iBuilder->CreateCondBr(terminationFound, exitBlock, segmentLoop);
     504        Value * segNo = K->acquireLogicalSegmentNo(instance);
     505        K->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     506    }
     507
     508    iBuilder->CreateCondBr(terminated, exitBlock, segmentLoop);
    524509    iBuilder->SetInsertPoint(exitBlock);
    525510}
    526 
    527    
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5389 r5390  
    123123}
    124124
    125 void StreamSetBuffer::releaseBuffer(Value * /* self */) {
     125void StreamSetBuffer::releaseBuffer(Value * /* self */) const {
    126126    /* do nothing: memory is stack allocated */
    127127}
     
    200200}
    201201
    202 void ExtensibleBuffer::releaseBuffer(Value * self) {
     202void ExtensibleBuffer::releaseBuffer(Value * self) const {
    203203    Value * const sizePtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    204204    Value * size = iBuilder->CreateLoad(sizePtr);
     
    446446}
    447447
    448 void ExpandableBuffer::releaseBuffer(Value * self) {
     448void ExpandableBuffer::releaseBuffer(Value * self) const {
    449449    iBuilder->CreateAlignedFree(getBaseAddress(self));
    450450}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5389 r5390  
    4747    virtual void allocateBuffer();
    4848
    49     virtual void releaseBuffer(llvm::Value * self);
     49    virtual void releaseBuffer(llvm::Value * self) const;
    5050
    5151    virtual llvm::Value * getStreamBlockPtr(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const;
     
    133133    void reserveBytes(llvm::Value * self, llvm::Value * required) const override;
    134134
    135     void releaseBuffer(llvm::Value * self) override;
     135    void releaseBuffer(llvm::Value * self) const override;
    136136
    137137protected:
     
    218218    llvm::Value * getStreamSetCount(llvm::Value * self) const override;
    219219
    220     void releaseBuffer(llvm::Value * self) override;
     220    void releaseBuffer(llvm::Value * self) const override;
    221221
    222222protected:
Note: See TracChangeset for help on using the changeset viewer.