Changeset 5408 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 15, 2017, 4:42:33 PM (2 years ago)
Author:
nmedfort
Message:

First attempt to allow Kernels to wait for consumers to finish processing before performing a realloc.

Location:
icGREP/icgrep-devel/icgrep
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5406 r5408  
    690690        pxDriver.addKernelCall(matchCountK, {&mergedResults}, {});
    691691        pxDriver.generatePipelineIR();
    692         iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
     692        iBuilder->CreateRet(matchCountK.getScalarField("matchedLineCount"));
    693693        pxDriver.linkAndFinalize();
    694694    } else {
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5402 r5408  
    4242kernels/evenodd.cpp
    4343kernels/evenodd.h
     44kernels/grep_kernel.cpp
     45kernels/grep_kernel.h
    4446kernels/interface.cpp
    4547kernels/interface.h
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5404 r5408  
    1515using namespace llvm;
    1616
    17 static std::string sha1sum(const std::string & str) {
     17inline static std::string sha1sum(const std::string & str) {
    1818    char buffer[41];    // 40 hex-digits and the terminating null
    19     unsigned int digest[5];     // 160 bits in total
    20    
     19    uint32_t digest[5]; // 160 bits in total
    2120    boost::uuids::detail::sha1 sha1;
    2221    sha1.process_bytes(str.c_str(), str.size());
     
    2726}
    2827
    29 ICgrepKernelBuilder::ICgrepKernelBuilder (IDISA::IDISA_Builder * iBuilder, RE * re_ast, bool CountOnly)
     28inline std::string makeSignature(RE * const re_ast, const bool CountOnly) {
     29    std::string signature = Printer_RE::PrintRE(re_ast);
     30    if (CountOnly) {
     31        signature += "-c";
     32    }
     33    return signature;
     34}
     35
     36ICgrepKernelBuilder::ICgrepKernelBuilder (IDISA::IDISA_Builder * const iBuilder, RE * const re_ast, const bool CountOnly)
    3037: PabloKernel(iBuilder, "",
    3138              {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}},
    3239              CountOnly ? std::vector<Binding>{} : std::vector<Binding>{Binding{iBuilder->getStreamSetTy(1, 1), "matches"}},
    3340              {},
    34               CountOnly ? std::vector<Binding>{Binding{iBuilder->getSizeTy(), "matchedLineCount"}} : std::vector<Binding>{}),
    35   mCountOnly(CountOnly),
    36   mRE(re_ast) {
    37    
    38     mSignature = Printer_RE::PrintRE(re_ast);
    39     if (CountOnly) {
    40         mSignature += "-c";
    41     }
    42     std::string uniqueID = sha1sum(mSignature);
    43     setName("Parabix:" + uniqueID);
     41              CountOnly ? std::vector<Binding>{Binding{iBuilder->getSizeTy(), "matchedLineCount"}} : std::vector<Binding>{})
     42, mCountOnly(CountOnly)
     43, mRE(re_ast)
     44, mSignature(makeSignature(re_ast, CountOnly)) {
     45    setName("Parabix:" + sha1sum(mSignature));
    4446}
    4547
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5404 r5408  
    77
    88#include <pablo/pablo_kernel.h>  // for PabloKernel
    9 #include "kernel.h"              // for KernelBuilder
    10 #include <re/re_re.h>
    11 #include <vector>                // for vector
    12 #include <string>                // for string
     9
    1310namespace IDISA { class IDISA_Builder; }
    14 
     11namespace re { class RE; }
    1512namespace kernel {
    1613
    1714class ICgrepKernelBuilder: public pablo::PabloKernel {
    1815public:
    19     ICgrepKernelBuilder(IDISA::IDISA_Builder * iBuilder, re::RE * re_ast, bool CountOnly = false);
     16    ICgrepKernelBuilder(IDISA::IDISA_Builder * const iBuilder, re::RE * const re_ast, const bool CountOnly = false);
    2017   
    2118    std::string generateKernelSignature(std::string moduleId) override;
     
    2421
    2522private:
    26     bool mCountOnly;
    27     re::RE * mRE;
    28     std::string mSignature;
     23    const bool      mCountOnly;
     24    re::RE * const  mRE;
     25    std::string     mSignature;
    2926};
    3027
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5402 r5408  
    7070}
    7171
    72 void KernelInterface::addKernelDeclarations(Module * client) {
     72void KernelInterface::addKernelDeclarations(Module * const client) {
    7373    Module * saveModule = iBuilder->getModule();
    7474    auto savePoint = iBuilder->saveIP();
     
    7777        throw std::runtime_error("Kernel interface " + getName() + " not yet finalized.");
    7878    }
    79     PointerType * selfType = PointerType::getUnqual(mKernelStateType);
     79    PointerType * const selfType = mKernelStateType->getPointerTo();
     80    IntegerType * const sizeTy = iBuilder->getSizeTy();
     81    PointerType * const consumerTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    8082
    8183    // Create the initialization function prototype
    82     std::vector<Type *> initParameters = {selfType};
     84    std::vector<Type *> initParameters = {selfType};   
    8385    for (auto binding : mScalarInputs) {
    8486        initParameters.push_back(binding.type);
    8587    }
     88    initParameters.insert(initParameters.end(), mStreamSetOutputs.size(), consumerTy);
     89
    8690    FunctionType * initType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    8791    Function * init = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
     
    9397        (++args)->setName(binding.name);
    9498    }
     99    for (auto binding : mStreamSetOutputs) {
     100        args->setName(binding.name + "ConsumerLogicalSegments");       
     101//        args->addAttr(Attribute::NoCapture);
     102//        args->addAttr(Attribute::ReadOnly);
     103        ++args;
     104    }
    95105
    96     /// INVESTIGATE: should we explicitly mark whether to track a kernel output's consumed amount? It would have
    97     /// to be done at the binding level using the current architecture. It would reduce the number of arguments
    98     /// passed between kernels.
    99 
    100     // Create the doSegment function prototype.   
    101     IntegerType * const sizeTy = iBuilder->getSizeTy();
    102 
     106    // Create the doSegment function prototype.
    103107    std::vector<Type *> params = {selfType, iBuilder->getInt1Ty()};
    104     params.insert(params.end(), mStreamSetInputs.size() + mStreamSetOutputs.size(), sizeTy);
     108    params.insert(params.end(), mStreamSetInputs.size(), sizeTy);
    105109
    106110    FunctionType * const doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
     
    113117    (++args)->setName("doFinal");
    114118    for (const Binding & input : mStreamSetInputs) {
    115         (++args)->setName(input.name + "_availableItems");
    116     }
    117     for (const Binding & output : mStreamSetOutputs) {
    118         (++args)->setName(output.name + "_consumedItems");
     119        (++args)->setName(input.name + "AvailableItems");
    119120    }
    120121
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5402 r5408  
    7575
    7676class KernelInterface {
    77 
    7877public:
    7978    /*
     
    101100    void addKernelDeclarations(llvm::Module * client);
    102101
    103     virtual void createInstance() = 0;
     102    virtual llvm::Value * createInstance() = 0;
     103
     104    virtual void initializeInstance() = 0;
    104105
    105106    void setInitialArguments(std::vector<llvm::Value *> args);
    106107
    107     llvm::Value * getInstance() const { return mKernelInstance; }
     108    llvm::Value * getInstance() const {
     109        return mKernelInstance;
     110    }
    108111
    109112    unsigned getLookAhead() const {
     
    115118    }
    116119
    117     virtual llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * doFinal = nullptr) const = 0;
     120    virtual llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const = 0;
    118121
    119     virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
     122    virtual void setProducedItemCount(const std::string & name, llvm::Value * value) const = 0;
    120123
    121     virtual llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const = 0;
     124    virtual llvm::Value * getProcessedItemCount(const std::string & name) const = 0;
    122125
    123     virtual void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
     126    virtual void setProcessedItemCount(const std::string & name, llvm::Value * value) const = 0;
    124127
    125     virtual llvm::Value * getTerminationSignal(llvm::Value * instance) const = 0;
     128    virtual llvm::Value * getTerminationSignal() const = 0;
    126129
    127     virtual void setTerminationSignal(llvm::Value * instance) const = 0;
     130    virtual void setTerminationSignal() const = 0;
    128131   
    129132    void setLookAhead(unsigned lookAheadPositions) {
     
    147150                    std::vector<Binding> && internal_scalars)
    148151    : iBuilder(builder)
     152    , mKernelInstance(nullptr)
     153    , mKernelStateType(nullptr)
     154    , mLookAheadPositions(0)
    149155    , mKernelName(kernelName)
    150156    , mStreamSetInputs(stream_inputs)
     
    153159    , mScalarOutputs(scalar_outputs)
    154160    , mInternalScalars(internal_scalars)
    155     , mKernelStateType(nullptr)
    156     , mKernelInstance(nullptr)
    157     , mLookAheadPositions(0) {
     161    {
    158162
    159163    }
    160164   
     165    void setInstance(llvm::Value * const instance) {
     166        assert ("kernel instance cannot be null!" && instance);
     167        assert ("kernel instance must point to a valid kernel state type!" && (instance->getType()->getPointerElementType() == mKernelStateType));
     168        mKernelInstance = instance;
     169    }
     170
    161171protected:
    162172   
    163     IDISA::IDISA_Builder * const iBuilder;
    164     std::string mKernelName;
    165     std::vector<llvm::Value *> mInitialArguments;
    166     std::vector<Binding> mStreamSetInputs;
    167     std::vector<Binding> mStreamSetOutputs;
    168     std::vector<Binding> mScalarInputs;
    169     std::vector<Binding> mScalarOutputs;
    170     std::vector<Binding> mInternalScalars;
    171     llvm::StructType * mKernelStateType;
    172     llvm::Value * mKernelInstance;
    173     unsigned mLookAheadPositions;
    174    
     173    IDISA::IDISA_Builder * const    iBuilder;
     174    llvm::Value *                   mKernelInstance;
     175    llvm::StructType *              mKernelStateType;
     176    unsigned                        mLookAheadPositions;
     177    std::string                     mKernelName;
     178    std::vector<llvm::Value *>      mInitialArguments;
     179    std::vector<Binding>            mStreamSetInputs;
     180    std::vector<Binding>            mStreamSetOutputs;
     181    std::vector<Binding>            mScalarInputs;
     182    std::vector<Binding>            mScalarOutputs;
     183    std::vector<Binding>            mInternalScalars;
    175184};
    176185
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5403 r5408  
    1515#include <llvm/Bitcode/ReaderWriter.h>
    1616#include <llvm/Transforms/Utils/Local.h>
    17 
    18 static const auto DO_BLOCK_SUFFIX = "_DoBlock";
    19 
    20 static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
    21 
    22 static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
    23 
    24 static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
    25 
    26 static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
    27 
    28 static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
    29 
    30 static const auto TERMINATION_SIGNAL = "terminationSignal";
    31 
    32 static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
    33 
    34 static const auto BLOCK_MASK_SUFFIX = "_blkMask";
     17#include <kernels/streamset.h>
     18#include <sstream>
     19
     20static const std::string DO_BLOCK_SUFFIX = "_DoBlock";
     21
     22static const std::string FINAL_BLOCK_SUFFIX = "_FinalBlock";
     23
     24static const std::string LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
     25
     26static const std::string PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
     27
     28static const std::string CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
     29
     30static const std::string PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
     31
     32static const std::string TERMINATION_SIGNAL = "terminationSignal";
     33
     34static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
     35
     36static const std::string CONSUMER_LOGICAL_SEGMENT_SUFFIX = "_cls";
    3537
    3638using namespace llvm;
     
    7072   
    7173void KernelBuilder::prepareKernel() {
     74
    7275    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    7376        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    7477    }
     78
    7579    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    7680        std::string tmp;
     
    97101        }       
    98102    }
     103
     104    IntegerType * const sizeTy = iBuilder->getSizeTy();
    99105    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    100106        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
    101107        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
    102             addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
     108            addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
    103109        }
    104110    }
     
    115121        addScalar(binding.type, binding.name);
    116122    }
    117     addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
     123
     124    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
     125    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     126        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     127    }
     128
     129    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    118130    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
     131
    119132    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    120133}
    121134
    122135Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    123     setCallParameters(inputs, outputs);
    124     std::string cacheName = getName() + "_" + iBuilder->getBuilderUniqueName();
    125     for (auto & b: mStreamSetInputBuffers) {
    126         cacheName += ":" + b->getUniqueID();
    127     }
    128     for (auto & b: mStreamSetOutputBuffers) {
    129         cacheName += ":" + b->getUniqueID();
    130     }
     136
     137    assert (mStreamSetInputBuffers.empty());
     138    assert (mStreamSetOutputBuffers.empty());
     139
     140    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
     141        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
     142                           " input stream sets but was given "
     143                           + std::to_string(mStreamSetInputBuffers.size()));
     144    }
     145
     146    for (unsigned i = 0; i < inputs.size(); ++i) {
     147        StreamSetBuffer * const buf = inputs[i];
     148        if (LLVM_UNLIKELY(buf == nullptr)) {
     149            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
     150                               + " cannot be null");
     151        }
     152        buf->addConsumer(this);
     153    }
     154
     155    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
     156        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
     157                           + " output stream sets but was given "
     158                           + std::to_string(mStreamSetOutputBuffers.size()));
     159    }
     160
     161    for (unsigned i = 0; i < outputs.size(); ++i) {
     162        StreamSetBuffer * const buf = outputs[i];
     163        if (LLVM_UNLIKELY(buf == nullptr)) {
     164            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     165                               + " cannot be null");
     166        }
     167        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
     168            buf->setProducer(this);
     169        } else {
     170            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     171                               + " is already produced by kernel " + buf->getProducer()->getName());
     172        }
     173    }
     174
     175    std::stringstream cacheName;
     176
     177    cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
     178    for (const StreamSetBuffer * b: inputs) {
     179        cacheName <<  ':' <<  b->getUniqueID();
     180    }
     181    for (const StreamSetBuffer * b: outputs) {
     182        cacheName <<  ':' <<  b->getUniqueID();
     183    }
     184
     185    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
     186    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     187
    131188    prepareKernel();
    132     return new Module(cacheName, iBuilder->getContext());
    133 }
    134 
    135 void KernelBuilder::setCallParameters(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    136     assert (mStreamSetInputBuffers.empty());
    137     mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    138     for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
    139         if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
    140             report_fatal_error(getName() + ": input stream set " + std::to_string(i)
    141                                + " cannot be null when calling generateKernel()");
    142         }
    143     }
    144     if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
    145         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
    146                            " input stream sets but generateKernel() was given "
    147                            + std::to_string(mStreamSetInputBuffers.size()));
    148     }
    149     assert (mStreamSetOutputBuffers.empty());
    150     mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
    151     for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    152         if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
    153             report_fatal_error(getName() + ": output stream set " + std::to_string(i)
    154                                + " cannot be null when calling generateKernel()");
    155         }
    156     }
    157     if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
    158         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
    159                            + " output stream sets but generateKernel() was given "
    160                            + std::to_string(mStreamSetOutputBuffers.size()));
    161     }       
    162 }   
     189    return new Module(cacheName.str(), iBuilder->getContext());
     190}
    163191
    164192// Default kernel signature: generate the IR and emit as byte code.
     
    193221        }
    194222        iBuilder->restoreIP(savePoint);
    195         mIsGenerated = true;
     223        mIsGenerated = true;       
    196224    }
    197225}
     
    201229    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    202230    auto args = mCurrentMethod->arg_begin();
    203     mSelf = &*(args++);
     231    setInstance(&*(args++));
    204232    Value * doFinal = &*(args++);
    205233    std::vector<Value *> producerPos;
     
    215243    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    216244    Function::arg_iterator args = mCurrentMethod->arg_begin();
    217     mSelf = &*(args++);
    218     iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
     245    setInstance(&*(args++));
     246    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
    219247    for (auto binding : mScalarInputs) {
    220         Value * param = &*(args++);
    221         Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
    222         iBuilder->CreateStore(param, ptr);
     248        setScalarField(binding.name, &*(args++));
     249    }
     250    for (auto binding : mStreamSetOutputs) {
     251        setConsumerState(binding.name, &*(args++));
    223252    }
    224253    generateInitMethod();
     
    234263}
    235264
    236 unsigned KernelBuilder::getScalarCount() const {
    237     return mKernelFields.size();
    238 }
    239 
    240 Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
    241     assert ("instance cannot be null!" && instance);
    242     return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
    243 }
    244 
    245 Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
    246     assert ("instance cannot be null!" && instance);
    247     return getScalarFieldPtr(instance, getScalarIndex(fieldName));
    248 }
    249 
    250 Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
    251     return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
    252 }
    253 
    254 Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
    255     assert ("instance cannot be null!" && instance);
    256     return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
    257 }
    258 
    259 void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
    260     assert ("instance cannot be null!" && instance);
    261     iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
    262 }
    263 
    264 void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
    265     assert ("instance cannot be null!" && instance);
    266     iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
    267 }
    268 
    269 Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
    270     assert ("instance cannot be null!" && instance);
     265Value * KernelBuilder::getProducedItemCount(const std::string & name, Value * doFinal) const {
    271266    Port port; unsigned ssIdx;
    272267    std::tie(port, ssIdx) = getStreamPort(name);
     
    290285            }
    291286        }
    292         Value * principalItemsProcessed = getScalarField(instance, principalField);
     287        Value * principalItemsProcessed = getScalarField(principalField);
    293288        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
    294289    }
    295     return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
     290    return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
    296291}
    297292
     
    309304}
    310305
    311 Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
    312     assert ("instance cannot be null!" && instance);
     306Value * KernelBuilder::getProcessedItemCount(const std::string & name) const {
    313307    Port port; unsigned ssIdx;
    314308    std::tie(port, ssIdx) = getStreamPort(name);
     
    319313            refSet = mStreamSetInputs[0].name;
    320314        }
    321         Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
     315        Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
    322316        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
    323317    }
    324     return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
    325 }
    326 
    327 llvm::Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
    328     auto arg = mCurrentMethod->arg_begin();
    329     ++arg; // self
    330     ++arg; // doFinal
    331     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    332         ++arg; // input
    333     }
    334     for (unsigned i = 0; i < mStreamSetOutputs.size(); ++i) {
    335         if (mStreamSetOutputs[i].name == name) {
    336             return &*arg;
    337         }
    338         ++arg;
    339     }
    340     return nullptr;
    341 }
    342 
    343 void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
    344     assert ("instance cannot be null!" && instance);
    345     setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
    346 }
    347 
    348 void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
    349     assert ("instance cannot be null!" && instance);
    350     setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    351 }
    352 
    353 Value * KernelBuilder::getTerminationSignal(Value * instance) const {
    354     assert ("instance cannot be null!" && instance);
    355     return getScalarField(instance, TERMINATION_SIGNAL);
    356 }
    357 
    358 void KernelBuilder::setTerminationSignal(Value * instance) const {
    359     assert ("instance cannot be null!" && instance);
    360     setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getTrue());
    361 }
    362 
    363 LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
    364     assert ("instance cannot be null!" && instance);
    365     return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
    366 }
    367 
    368 void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
    369     assert ("instance cannot be null!" && instance);
    370     iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
     318    return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
     319}
     320
     321void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
     322    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
     323}
     324
     325void KernelBuilder::setProcessedItemCount(const std::string & name, Value * value) const {
     326    setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
     327}
     328
     329Value * KernelBuilder::getTerminationSignal() const {
     330    return getScalarField(TERMINATION_SIGNAL);
     331}
     332
     333void KernelBuilder::setTerminationSignal() const {
     334    setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
     335}
     336
     337LoadInst * KernelBuilder::acquireLogicalSegmentNo() const {
     338    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
     339}
     340
     341void KernelBuilder::releaseLogicalSegmentNo(Value * nextSegNo) const {
     342    iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
     343}
     344
     345llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
     346    return getScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     347}
     348
     349void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
     350    setScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX, value);
    371351}
    372352
     
    505485
    506486Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
    507     return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
     487    return getScalarField(name + BUFFER_PTR_SUFFIX);
    508488}
    509489
     
    518498
    519499CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
     500    assert (getDoSegmentFunction()->getArgumentList().size() == args.size());
    520501    return iBuilder->CreateCall(getDoSegmentFunction(), args);
    521502}
     
    529510}
    530511
    531 void KernelBuilder::createInstance() {
     512Value * KernelBuilder::createInstance() {
    532513    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    533514        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
    534515    }
    535     mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
    536 
     516    setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
     517    return getInstance();
     518}
     519
     520void KernelBuilder::initializeInstance() {
     521    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
     522        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
     523    }
    537524    std::vector<Value *> args;
    538     args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
    539     args.push_back(mKernelInstance);
     525    args.reserve(1 + mInitialArguments.size() + mStreamSetInputBuffers.size() + (mStreamSetOutputBuffers.size() * 2));
     526    args.push_back(getInstance());
    540527    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
    541528        Value * arg = mInitialArguments[i];
     
    566553    }
    567554    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
     555
     556    IntegerType * const sizeTy = iBuilder->getSizeTy();
     557    PointerType * const sizePtrTy = sizeTy->getPointerTo();
     558    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
     559    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
     560    Constant * const sizeOfSizePtrTy = ConstantExpr::getSizeOf(sizePtrTy);
     561
     562    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
     563        const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     564        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
     565        Value * const numPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     566        const auto n = consumers.size();
     567        const auto consumerCount = iBuilder->getSize(n);
     568        iBuilder->CreateStore(consumerCount, numPtr);
     569        Value * const consumerPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     570        Value * const segNoPtrs = iBuilder->CreatePointerCast(iBuilder->CreateMalloc(ConstantExpr::getMul(consumerCount, sizeOfSizePtrTy)), sizePtrPtrTy);
     571        iBuilder->CreateStore(segNoPtrs, consumerPtr);
     572        for (unsigned i = 0; i < n; ++i) {
     573            KernelBuilder * const consumer = consumers[i];
     574            assert (consumer->getInstance());
     575            iBuilder->CreateStore(consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR), iBuilder->CreateGEP(segNoPtrs, iBuilder->getSize(i)));
     576        }
     577        args.push_back(outputConsumers);
     578    }
    568579    iBuilder->CreateCall(getInitFunction(), args);
    569580}
     
    602613    }
    603614
    604     PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
     615    PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
    605616    stridesRemaining->addIncoming(stridesToDo, entryBlock);
    606617    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
     
    679690inline void BlockOrientedKernel::writeDoBlockMethod() {
    680691
    681     Value * const self = mSelf;
     692    Value * const self = getInstance();
    682693    Function * const cp = mCurrentMethod;
    683694    auto ip = iBuilder->saveIP();
     
    685696    /// Check if the do block method is called and create the function if necessary   
    686697    if (!useIndirectBr()) {
    687         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
     698        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
    688699        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
    689700        mCurrentMethod->setCallingConv(CallingConv::C);
     
    692703        auto args = mCurrentMethod->arg_begin();
    693704        mCurrentMethod = mCurrentMethod;
    694         mSelf = &*args;
    695         mSelf->setName("self");
     705        args->setName("self");
     706        setInstance(&*args);
    696707        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
    697708    }
     
    750761        iBuilder->restoreIP(ip);
    751762        iBuilder->CreateCall(mCurrentMethod, self);
    752         mSelf = self;
     763        setInstance(self);
    753764        mCurrentMethod = cp;
    754765    }
     
    758769inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
    759770
    760     Value * const self = mSelf;
     771    Value * const self = getInstance();
    761772    Function * const cp = mCurrentMethod;
    762773    Value * const remainingItemCount = remainingItems;
     
    764775
    765776    if (!useIndirectBr()) {
    766         FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
     777        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
    767778        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
    768779        mCurrentMethod->setCallingConv(CallingConv::C);
     
    770781        mCurrentMethod->setDoesNotCapture(1);
    771782        auto args = mCurrentMethod->arg_begin();
    772         mSelf = &*args;
    773         mSelf->setName("self");
     783        args->setName("self");
     784        setInstance(&*args);
    774785        remainingItems = &*(++args);
    775786        remainingItems->setName("remainingItems");
     
    786797        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
    787798        mCurrentMethod = cp;
    788         mSelf = self;
     799        setInstance(self);
    789800    }
    790801
     
    805816        iBuilder->SetInsertPoint(bb);
    806817    } else {
    807         iBuilder->CreateCall(mDoBlockMethod, mSelf);
    808     }
    809 }
    810 
    811 // CONSTRUCTOR
    812 
    813 BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
    814                                                            std::string && kernelName,
    815                                                            std::vector<Binding> && stream_inputs,
    816                                                            std::vector<Binding> && stream_outputs,
    817                                                            std::vector<Binding> && scalar_parameters,
    818                                                            std::vector<Binding> && scalar_outputs,
    819                                                            std::vector<Binding> && internal_scalars)
    820 : KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
    821 , mDoBlockMethod(nullptr)
    822 , mStrideLoopBody(nullptr)
    823 , mStrideLoopBranch(nullptr)
    824 , mStrideLoopTarget(nullptr) {
    825 
     818        iBuilder->CreateCall(mDoBlockMethod, getInstance());
     819    }
    826820}
    827821
     
    836830                             std::vector<Binding> && internal_scalars)
    837831: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
    838 , mSelf(nullptr)
    839832, mCurrentMethod(nullptr)
    840 , mNoTerminateAttribute(false) 
     833, mNoTerminateAttribute(false)
    841834, mIsGenerated(false) {
    842835
    843836}
    844837
    845 KernelBuilder::~KernelBuilder() { }
     838KernelBuilder::~KernelBuilder() {
     839
     840}
     841
     842// CONSTRUCTOR
     843BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
     844                                         std::string && kernelName,
     845                                         std::vector<Binding> && stream_inputs,
     846                                         std::vector<Binding> && stream_outputs,
     847                                         std::vector<Binding> && scalar_parameters,
     848                                         std::vector<Binding> && scalar_outputs,
     849                                         std::vector<Binding> && internal_scalars)
     850: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     851, mDoBlockMethod(nullptr)
     852, mStrideLoopBody(nullptr)
     853, mStrideLoopBranch(nullptr)
     854, mStrideLoopTarget(nullptr) {
     855
     856}
    846857
    847858// CONSTRUCTOR
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5403 r5408  
    1212#include <boost/container/flat_map.hpp>
    1313#include <IR_Gen/idisa_builder.h>
     14#include <kernels/pipeline.h>
    1415
    1516//namespace llvm { class ConstantInt; }
     
    3132    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
    3233    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
     34
     35    friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
     36    friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
     37    friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
    3338public:
    3439   
     
    6873    llvm::Module * createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    6974     
    70     void setCallParameters(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    71 
    7275    // Generate the Kernel to the current module (iBuilder->getModule()).
    7376    void generateKernel();
    7477   
    75     void createInstance() override;
    76 
    77     llvm::Value * getProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * doFinal = nullptr) const final;
    78 
    79     void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    80 
    81     llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const final;
    82 
    83     void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    84 
    85     bool hasNoTerminateAttribute() { return mNoTerminateAttribute;}
    86    
    87     llvm::Value * getTerminationSignal(llvm::Value * instance) const final;
    88 
    89     void setTerminationSignal(llvm::Value * instance) const final;
    90 
    91     // Get the value of a scalar field for a given instance.
    92     llvm::Value * getScalarField(llvm::Value * instance, const std::string & fieldName) const;
    93 
    94     llvm::Value * getScalarField(llvm::Value * instance, llvm::Value * index) const;
    95 
    96     // Set the value of a scalar field for a given instance.
    97     void setScalarField(llvm::Value *instance, const std::string & fieldName, llvm::Value * value) const;
    98 
    99     void setScalarField(llvm::Value * instance, llvm::Value * index, llvm::Value * value) const;
     78    llvm::Value * createInstance() final;
     79
     80    void initializeInstance() final;
     81
     82    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
     83
     84    void setProducedItemCount(const std::string & name, llvm::Value * value) const final;
     85
     86    llvm::Value * getProcessedItemCount(const std::string & name) const final;
     87
     88    void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
     89
     90    bool hasNoTerminateAttribute() const {
     91        return mNoTerminateAttribute;
     92    }
     93   
     94    llvm::Value * getTerminationSignal() const final;
     95
     96    void setTerminationSignal() const final;
     97
     98    // Get the value of a scalar field for the current instance.
     99    llvm::Value * getScalarFieldPtr(llvm::Value * index) const {
     100        return getScalarFieldPtr(getInstance(), index);
     101    }
     102
     103    llvm::Value * getScalarFieldPtr(const std::string & fieldName) const {
     104        return getScalarFieldPtr(getInstance(), fieldName);
     105    }
     106
     107    llvm::Value * getScalarField(const std::string & fieldName) const {
     108        return iBuilder->CreateLoad(getScalarFieldPtr(fieldName));
     109    }
     110
     111    llvm::Value * getScalarField(llvm::Value * index) const {
     112        return iBuilder->CreateLoad(getScalarFieldPtr(index));
     113    }
     114
     115    // Set the value of a scalar field for the current instance.
     116    void setScalarField(const std::string & fieldName, llvm::Value * value) const {
     117        iBuilder->CreateStore(value, getScalarFieldPtr(fieldName));
     118    }
     119
     120    void setScalarField(llvm::Value * index, llvm::Value * value) const {
     121        iBuilder->CreateStore(value, getScalarFieldPtr(index));
     122    }
     123
    100124
    101125    // Synchronization actions for executing a kernel for a particular logical segment.
     
    108132    // data has been extracted from the kernel for further pipeline processing, the
    109133    // segment number must be incremented and stored using releaseLogicalSegmentNo.
    110     llvm::LoadInst * acquireLogicalSegmentNo(llvm::Value * instance) const;
    111 
    112     void releaseLogicalSegmentNo(llvm::Value * instance, llvm::Value * newFieldVal) const;
     134    llvm::LoadInst * acquireLogicalSegmentNo() const;
     135
     136    void releaseLogicalSegmentNo(llvm::Value * nextSegNo) const;
     137
     138    llvm::Value * getConsumerState(const std::string & name) const;
    113139
    114140    // Get a parameter by name.
     
    126152        return getBuilder()->getStreamSetTy(NumElements, FieldWidth);
    127153    }
    128    
     154       
     155    const StreamSetBuffers & getStreamSetInputBuffers() const { return mStreamSetInputBuffers; }
     156
     157    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const { return mStreamSetInputBuffers[i]; }
     158
     159    const StreamSetBuffers & getStreamSetOutputBuffers() const { return mStreamSetOutputBuffers; }
     160
     161    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const { return mStreamSetOutputBuffers[i]; }
     162
     163    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
     164
     165    llvm::CallInst * createGetAccumulatorCall(llvm::Value * self, const std::string & accumName) const;
     166
    129167    virtual ~KernelBuilder() = 0;
    130    
    131     const std::vector<const parabix::StreamSetBuffer *> & getStreamSetInputBuffers() const { return mStreamSetInputBuffers; }
    132 
    133     const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const { return mStreamSetInputBuffers[i]; }
    134 
    135     const std::vector<const parabix::StreamSetBuffer *> & getStreamSetOutputBuffers() const { return mStreamSetOutputBuffers; }
    136 
    137     const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const { return mStreamSetOutputBuffers[i]; }
    138 
    139     llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    140 
    141     llvm::CallInst * createGetAccumulatorCall(llvm::Value * self, const std::string & accumName) const;
    142168
    143169protected:
     
    181207    unsigned addUnnamedScalar(llvm::Type * type);
    182208
    183     unsigned getScalarCount() const;
    184 
    185209    // Run-time access of Kernel State and parameters of methods for
    186210    // use in implementing kernels.
     
    189213    llvm::ConstantInt * getScalarIndex(const std::string & name) const;
    190214
    191     // Get the value of a scalar field for a given instance.
    192     llvm::Value * getScalarField(const std::string & fieldName) const {
    193         return getScalarField(getSelf(), fieldName);
    194     }
    195 
    196     llvm::Value * getScalarField(llvm::Value * index) const {
    197         return getScalarField(getSelf(), index);
    198     }
    199 
    200     // Set the value of a scalar field for a given instance.
    201     void setScalarField(const std::string & fieldName, llvm::Value * value) const {
    202         return setScalarField(getSelf(), fieldName, value);
    203     }
    204 
    205     void setScalarField(llvm::Value * index, llvm::Value * value) const {
    206         return setScalarField(getSelf(), index, value);
    207     }
    208 
    209215    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
    210216
     
    241247    void reserveBytes(const std::string & name, llvm::Value * requested) const;
    242248
    243     llvm::Value * getScalarFieldPtr(const std::string & name) const {
    244         return getScalarFieldPtr(getSelf(), name);
    245     }
    246 
    247     llvm::Value * getScalarFieldPtr(llvm::Value * index) const {
    248         return getScalarFieldPtr(getSelf(), index);
    249     }
    250 
    251     inline llvm::Value * getProducedItemCount(const std::string & name) const {
    252         return getProducedItemCount(getSelf(), name);
    253     }
    254 
    255     inline void setProducedItemCount(const std::string & name, llvm::Value * value) const {
    256         setProducedItemCount(getSelf(), name, value);
    257     }
    258 
    259249    llvm::Value * getAvailableItemCount(const std::string & name) const;
    260250
    261     inline llvm::Value * getProcessedItemCount(const std::string & name) const {
    262         return getProcessedItemCount(getSelf(), name);
    263     }
    264 
    265     inline void setProcessedItemCount(const std::string & name, llvm::Value * value) const {
    266         setProcessedItemCount(getSelf(), name, value);
    267     }
    268 
    269     llvm::Value * getConsumedItemCount(const std::string & name) const;
    270 
    271     llvm::Value * getTerminationSignal() const {
    272         return getTerminationSignal(getSelf());
    273     }
    274 
    275     void setTerminationSignal() const {
    276         return setTerminationSignal(getSelf());
    277     }
    278 
    279     llvm::Value * getSelf() const {
    280         return mSelf;
    281     }
    282 
    283251    llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
    284252
     
    287255    llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
    288256
    289     llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & name) const;
    290 
    291     llvm::Value * getScalarFieldPtr(llvm::Value * instance, llvm::Value * index) const;
     257    llvm::Value * getScalarFieldPtr(llvm::Value * const instance, llvm::Value * index) const {
     258        assert ("instance cannot be null!" && instance);
     259        return iBuilder->CreateGEP(getInstance(), {iBuilder->getInt32(0), index});
     260    }
     261
     262    llvm::Value * getScalarFieldPtr(llvm::Value * const instance, const std::string & fieldName) const {
     263        return getScalarFieldPtr(instance, getScalarIndex(fieldName));
     264    }
    292265
    293266    StreamPort getStreamPort(const std::string & name) const;
     
    311284    void callGenerateDoSegmentMethod();
    312285
     286
    313287private:
    314288
     289    void setConsumerState(const std::string & name, llvm::Value * value) const;
     290
    315291    llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
    316292
    317293protected:
    318294
    319     llvm::Value *                                   mSelf;
    320     llvm::Function *                                mCurrentMethod;
    321 
    322     std::vector<llvm::Type *>                       mKernelFields;
    323     KernelMap                                       mKernelMap;
    324     StreamMap                                       mStreamMap;
    325     std::vector<const parabix::StreamSetBuffer *>   mStreamSetInputBuffers;
    326     std::vector<const parabix::StreamSetBuffer *>   mStreamSetOutputBuffers;
    327     bool                                            mNoTerminateAttribute;
    328     bool                                            mIsGenerated;
     295    llvm::Function *                mCurrentMethod;
     296    std::vector<llvm::Type *>       mKernelFields;
     297    KernelMap                       mKernelMap;
     298    StreamMap                       mStreamMap;
     299    StreamSetBuffers                mStreamSetInputBuffers;
     300    StreamSetBuffers                mStreamSetOutputBuffers;
     301    bool                            mNoTerminateAttribute;
     302    bool                            mIsGenerated;
    329303
    330304};
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5407 r5408  
    4343void generateSegmentParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    4444
    45     const unsigned n = kernels.size(); assert (n > 0);
     45    const unsigned n = kernels.size();
    4646    Module * const m = iBuilder->getModule();
    4747    IntegerType * const sizeTy = iBuilder->getSizeTy();
     
    5151    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    5252
     53    assert (!kernels.empty());
     54
    5355    std::vector<Type *> structTypes;
    54     for (unsigned i = 0; i < n; i++) {
    55         kernels[i]->createInstance();
    56         structTypes.push_back(kernels[i]->getInstance()->getType());
     56
     57    Value * instance[n];
     58    for (unsigned i = 0; i < n; ++i) {
     59        instance[i] = kernels[i]->getInstance();
     60        structTypes.push_back(instance[i]->getType());
    5761    }
    5862    StructType * const sharedStructType = StructType::get(m->getContext(), structTypes);
    5963    StructType * const threadStructType = StructType::get(sharedStructType->getPointerTo(), sizeTy, nullptr);
     64
     65    Function * const threadFunc = makeThreadFunction("segment", m);
    6066
    6167    // -------------------------------------------------------------------------------------------------------------------------
     
    6369    // -------------------------------------------------------------------------------------------------------------------------
    6470    const auto ip = iBuilder->saveIP();
    65     Function * const threadFunc = makeThreadFunction("sppt", m);
    6671
    6772     // Create the basic blocks for the thread function.
     
    7176    Value * const threadStruct = iBuilder->CreatePointerCast(input, threadStructType->getPointerTo());
    7277    Value * const sharedStatePtr = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    73     Value * instance[n];
    74     for (unsigned k = 0; k < n; k++) {
    75         Value * ptr = iBuilder->CreateGEP(sharedStatePtr, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    76         instance[k] = iBuilder->CreateLoad(ptr);
     78    for (unsigned k = 0; k < n; ++k) {
     79        Value * ptr = iBuilder->CreateLoad(iBuilder->CreateGEP(sharedStatePtr, {iBuilder->getInt32(0), iBuilder->getInt32(k)}));
     80        kernels[k]->setInstance(ptr);
    7781    }
    7882    Value * const segOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     
    8892    Value * const nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    8993
    90     BasicBlock * segmentWait = BasicBlock::Create(iBuilder->getContext(), kernels[0]->getName() + "Wait", threadFunc);
    91 
    9294    BasicBlock * segmentLoopBody = nullptr;
    93 
    9495    BasicBlock * const exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc);
    9596
    96     iBuilder->CreateBr(segmentWait);
    97 
    9897    StreamSetBufferMap<Value *> producedPos;
    9998
    100     for (unsigned k = 0;;) {
     99    for (unsigned k = 0; k < n; ++k) {
     100
    101101        const auto & kernel = kernels[k];
     102
     103        BasicBlock * const segmentWait = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Wait", threadFunc);
     104        iBuilder->CreateBr(segmentWait);
     105
     106        segmentLoopBody = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Do", threadFunc);
     107
     108        iBuilder->SetInsertPoint(segmentWait);
    102109        const unsigned waitIdx = codegen::DebugOptionIsSet(codegen::SerializeThreads) ? (n - 1) : k;
    103 
    104         segmentLoopBody = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Do", threadFunc);
    105 
    106         iBuilder->SetInsertPoint(segmentWait);
    107         Value * const processedSegmentCount = kernels[waitIdx]->acquireLogicalSegmentNo(instance[waitIdx]);
     110        Value * const processedSegmentCount = kernels[waitIdx]->acquireLogicalSegmentNo();
    108111        assert (processedSegmentCount->getType() == segNo->getType());
    109112        Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
     
    115118            BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Exit", threadFunc, 0);
    116119            iBuilder->CreateCondBr(ready, completionTest, segmentWait);
     120
    117121            iBuilder->SetInsertPoint(completionTest);
    118             Value * alreadyDone = kernel->getTerminationSignal(instance[k]);
    119             iBuilder->CreateCondBr(alreadyDone, exitBlock, segmentLoopBody);
     122            iBuilder->CreateCondBr(kernel->getTerminationSignal(), exitBlock, segmentLoopBody);
    120123            iBuilder->SetInsertPoint(exitBlock);
    121124            // Ensure that the next thread will also exit.
    122             kernel->releaseLogicalSegmentNo(instance[k], nextSegNo);
     125            kernel->releaseLogicalSegmentNo(nextSegNo);
    123126            iBuilder->CreateBr(exitThreadBlock);
    124127        }
     
    128131        const auto & inputs = kernel->getStreamInputs();
    129132        const auto & outputs = kernel->getStreamOutputs();
    130         std::vector<Value *> args = {instance[k], doFinal};
     133        std::vector<Value *> args = {kernel->getInstance(), doFinal};
    131134        for (unsigned i = 0; i < inputs.size(); ++i) {
    132135            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
     
    136139            args.push_back(f->second);
    137140        }
    138         for (unsigned i = 0; i < outputs.size(); ++i) {
    139             args.push_back(iBuilder->getSize(0));
    140         }
     141
    141142        CallInst * ci = kernel->createDoSegmentCall(args);
    142143        // TODO: investigate whether this actually inlines the function call correctly despite being in a seperate module.
     
    144145
    145146        if (!kernel->hasNoTerminateAttribute()) {
    146             doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal(instance[k]));
    147         }
    148         for (unsigned i = 0; i < outputs.size(); i++) {
    149             Value * const produced = kernel->getProducedItemCount(instance[k], outputs[i].name, doFinal);
     147            doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal());
     148        }
     149        for (unsigned i = 0; i < outputs.size(); ++i) {
     150            Value * const produced = kernel->getProducedItemCount(outputs[i].name, doFinal);
    150151            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    151152            assert (producedPos.count(buf) == 0);
     
    153154        }
    154155
    155         kernel->releaseLogicalSegmentNo(instance[k], nextSegNo);
    156         if (LLVM_UNLIKELY(++k == n)) {
    157             assert (segmentLoopBody);
    158             exitThreadBlock->moveAfter(segmentLoopBody);
    159             segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(threads)), segmentLoopBody);
    160             iBuilder->CreateCondBr(doFinal, exitThreadBlock, segmentLoop);
    161 
    162             iBuilder->SetInsertPoint(exitThreadBlock);
    163             iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
    164             iBuilder->CreateRetVoid();
    165             break;
    166         } else {
    167             segmentWait = BasicBlock::Create(iBuilder->getContext(), kernels[k]->getName() + "Wait", threadFunc);
    168             iBuilder->CreateBr(segmentWait);
    169         }
    170     }
     156        kernel->releaseLogicalSegmentNo(nextSegNo);
     157    }
     158
     159    assert (segmentLoopBody);
     160    exitThreadBlock->moveAfter(segmentLoopBody);
     161    segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(threads)), segmentLoopBody);
     162    iBuilder->CreateCondBr(doFinal, exitThreadBlock, segmentLoop);
     163
     164    iBuilder->SetInsertPoint(exitThreadBlock);
     165    iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
     166    iBuilder->CreateRetVoid();
    171167
    172168    // -------------------------------------------------------------------------------------------------------------------------
    173169    iBuilder->restoreIP(ip);
     170
     171    for (unsigned i = 0; i < n; ++i) {
     172        kernels[i]->setInstance(instance[i]);
     173    }
    174174
    175175    // -------------------------------------------------------------------------------------------------------------------------
    176176    // MAKE SEGMENT PARALLEL PIPELINE DRIVER
    177177    // -------------------------------------------------------------------------------------------------------------------------
    178 
    179178    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    180179    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
    181180    Value * threadIdPtr[threads];
    182     for (unsigned i = 0; i < threads; i++) {
     181
     182    for (unsigned i = 0; i < threads; ++i) {
    183183        threadIdPtr[i] = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    184184    }
    185185
    186     for (unsigned i = 0; i < n; i++) {
    187         kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->getSize(0));
     186    for (unsigned i = 0; i < n; ++i) {
     187        kernels[i]->releaseLogicalSegmentNo(iBuilder->getSize(0));
    188188    }
    189189
    190190    AllocaInst * const sharedStruct = iBuilder->CreateCacheAlignedAlloca(sharedStructType);
    191     for (unsigned i = 0; i < n; i++) {
     191    for (unsigned i = 0; i < n; ++i) {
    192192        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    193193        iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
    194194    }
    195195
    196     for (unsigned i = 0; i < threads; i++) {
     196    for (unsigned i = 0; i < threads; ++i) {
    197197        AllocaInst * threadState = iBuilder->CreateAlloca(threadStructType);
    198198        Value * const sharedStatePtr = iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    204204
    205205    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
    206     for (unsigned i = 0; i < threads; i++) {
     206    for (unsigned i = 0; i < threads; ++i) {
    207207        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
    208208        iBuilder->CreatePThreadJoinCall(threadId, status);
     
    214214 * @brief generateParallelPipeline
    215215 ** ------------------------------------------------------------------------------------------------------------- */
    216 void generateParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
     216void generateParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> &kernels) {
    217217
    218218    Module * const m = iBuilder->getModule();
     
    224224    Constant * const nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    225225
    226     for (auto & k : kernels) {
    227         k->createInstance();
    228     }
    229 
    230226    const unsigned n = kernels.size();
    231227
     
    233229    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
    234230    Value * threadIdPtr[n];
    235     for (unsigned i = 0; i < n; i++) {
     231    for (unsigned i = 0; i < n; ++i) {
    236232        threadIdPtr[i] = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    237233    }
    238234
     235    Value * instance[n];
    239236    Type * structTypes[n];
    240     for (unsigned i = 0; i < n; i++) {
    241         structTypes[i] = kernels[i]->getInstance()->getType();
    242     }
     237    for (unsigned i = 0; i < n; ++i) {
     238        instance[i] = kernels[i]->getInstance();
     239        structTypes[i] = instance[i]->getType();
     240    }
     241
    243242    Type * const sharedStructType = StructType::get(m->getContext(), ArrayRef<Type *>{structTypes, n});
     243
     244
    244245    AllocaInst * sharedStruct = iBuilder->CreateAlloca(sharedStructType);
    245     for (unsigned i = 0; i < n; i++) {
     246    for (unsigned i = 0; i < n; ++i) {
    246247        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    247         iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
    248     }
     248        iBuilder->CreateStore(instance[i], ptr);
     249    }
     250
    249251    for (auto & kernel : kernels) {
    250         kernel->releaseLogicalSegmentNo(kernel->getInstance(), iBuilder->getSize(0));
     252        kernel->releaseLogicalSegmentNo(iBuilder->getSize(0));
    251253    }
    252254
     
    288290
    289291    Function * thread_functions[n];
     292    Value * producerSegNo[n];
    290293    for (unsigned id = 0; id < n; id++) {
    291294        const auto & kernel = kernels[id];
    292295        const auto & inputs = kernel->getStreamInputs();
    293         const auto & outputs = kernel->getStreamOutputs();
    294296
    295297        Function * const threadFunc = makeThreadFunction("ppt:" + kernel->getName(), m);
     
    306308        Value * sharedStruct = iBuilder->CreateBitCast(&threadFunc->getArgumentList().front(), sharedStructType->getPointerTo());
    307309
    308         Value * instancePtrs[n];
    309310        for (unsigned k = 0; k < n; k++) {
    310311            Value * const ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(k)});
    311             instancePtrs[k] = iBuilder->CreateLoad(ptr);
     312            kernels[k]->setInstance(iBuilder->CreateLoad(ptr));
    312313        }
    313314
     
    327328        }
    328329        for (unsigned k : kernelSet) {
    329             Value * consumerSegNo = kernels[k]->acquireLogicalSegmentNo(instancePtrs[k]);
     330            Value * consumerSegNo = kernels[k]->acquireLogicalSegmentNo();
    330331            assert (consumerSegNo->getType() == segNo->getType());
    331332            Value * consumedSegNo = iBuilder->CreateAdd(consumerSegNo, bufferSegments);
     
    340341            kernelSet.insert(producingKernel[buf]);
    341342        }
    342         const auto m = kernelSet.size();
    343         Value * producerSegNo[m];
    344343
    345344        Value * inputWaitCond = iBuilder->getTrue();
    346         if (m) {
    347             unsigned j = 0;
    348             for (unsigned k : kernelSet) {
    349                 producerSegNo[j] = kernels[k]->acquireLogicalSegmentNo(instancePtrs[k]);
    350                 assert (producerSegNo[j]->getType() == segNo->getType());
    351                 inputWaitCond = iBuilder->CreateAnd(inputWaitCond, iBuilder->CreateICmpULT(segNo, producerSegNo[j++]));
    352             }
     345        for (unsigned k : kernelSet) {
     346            producerSegNo[k] = kernels[k]->acquireLogicalSegmentNo();
     347            assert (producerSegNo[k]->getType() == segNo->getType());
     348            inputWaitCond = iBuilder->CreateAnd(inputWaitCond, iBuilder->CreateICmpULT(segNo, producerSegNo[k]));
    353349        }
    354350        iBuilder->CreateCondBr(inputWaitCond, doSegmentBlock, inputCheckBlock);
     
    358354
    359355        Value * const nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    360 
    361356        Value * terminated = nullptr;
    362         if (m == 0) {
     357        if (kernelSet.empty()) {
    363358            // if this kernel has no input streams, the kernel itself must decide when it terminates.
    364             terminated = kernel->getTerminationSignal(instancePtrs[id]);
     359            terminated = kernel->getTerminationSignal();
    365360        } else {
    366361            // ... otherwise the kernel terminates only when it exhausts all of its input streams
    367362            terminated = iBuilder->getTrue();
    368             unsigned j = 0;
    369363            for (unsigned k : kernelSet) {
    370                 terminated = iBuilder->CreateAnd(terminated, kernels[k]->getTerminationSignal(instancePtrs[k]));
    371                 terminated = iBuilder->CreateAnd(terminated, iBuilder->CreateICmpEQ(nextSegNo, producerSegNo[j++]));
     364                terminated = iBuilder->CreateAnd(terminated, kernels[k]->getTerminationSignal());
     365                terminated = iBuilder->CreateAnd(terminated, iBuilder->CreateICmpEQ(nextSegNo, producerSegNo[k]));
    372366            }
    373367            kernelSet.clear();
    374368        }
    375369
    376         std::vector<Value *> args = {instancePtrs[id], terminated};
     370        std::vector<Value *> args = {kernel->getInstance(), terminated};
    377371        args.insert(args.end(), inputs.size(), iBuilder->CreateMul(segmentItems, segNo));
    378         args.insert(args.end(), outputs.size(), iBuilder->getSize(0));
    379372
    380373        kernel->createDoSegmentCall(args);
    381374        segNo->addIncoming(nextSegNo, doSegmentBlock);
    382         kernel->releaseLogicalSegmentNo(instancePtrs[id], nextSegNo);
     375        kernel->releaseLogicalSegmentNo(nextSegNo);
    383376
    384377        iBuilder->CreateCondBr(terminated, exitThreadBlock, outputCheckBlock);
     
    393386    iBuilder->restoreIP(ip);
    394387
    395     for (unsigned i = 0; i < n; i++) {
     388    for (unsigned i = 0; i < n; ++i) {
     389        kernels[i]->setInstance(instance[i]);
     390    }
     391
     392    for (unsigned i = 0; i < n; ++i) {
    396393        iBuilder->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, thread_functions[i], sharedStruct);
    397394    }
    398395
    399396    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
    400     for (unsigned i = 0; i < n; i++) {
     397    for (unsigned i = 0; i < n; ++i) {
    401398        Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
    402399        iBuilder->CreatePThreadJoinCall(threadId, status);
     
    410407void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    411408
    412     for (auto & k : kernels) {
    413         k->createInstance();
    414     }
    415409    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    416410    Function * main = entryBlock->getParent();
     411
     412    assert (!kernels.empty());
    417413
    418414    // Create the basic blocks for the loop.
     
    421417
    422418    StreamSetBufferMap<Value *> producedPos;
    423     StreamSetBufferMap<std::pair<PHINode *, Value *>> consumedPos;
    424419
    425420    iBuilder->CreateBr(pipelineLoop);
    426421    iBuilder->SetInsertPoint(pipelineLoop);
    427422
    428     // Build up the initial phi nodes for each of the consumed (minimum processed) positions
    429     for (auto & kernel : kernels) {
    430         const auto & outputs = kernel->getStreamOutputs();
    431         for (unsigned i = 0; i < outputs.size(); ++i) {
    432             const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    433             if (LLVM_UNLIKELY(consumedPos.count(buf) != 0)) {
    434                 report_fatal_error(kernel->getName() + " redefines stream set " + outputs[i].name);
    435             }
    436             PHINode * phi = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    437             phi->addIncoming(iBuilder->getSize(0), entryBlock);
    438             consumedPos.emplace(buf, std::make_pair(phi, nullptr));
    439         }
    440     }
    441 
    442423    Value * terminated = iBuilder->getFalse();
    443424    for (auto & kernel : kernels) {
    444         Value * const instance = kernel->getInstance();
    445425        const auto & inputs = kernel->getStreamInputs();
    446         const auto & outputs = kernel->getStreamOutputs();
    447         std::vector<Value *> args = {instance, terminated};
     426        std::vector<Value *> args = {kernel->getInstance(), terminated};
    448427        for (unsigned i = 0; i < inputs.size(); ++i) {
    449428            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
     
    453432            args.push_back(f->second);
    454433        }
    455         for (unsigned i = 0; i < outputs.size(); ++i) {
    456             const auto f = consumedPos.find(kernel->getStreamSetOutputBuffer(i));
    457             assert (f != consumedPos.end());
    458             args.push_back(std::get<0>(f->second));
    459         }
    460434        kernel->createDoSegmentCall(args);
    461435        if (!kernel->hasNoTerminateAttribute()) {
    462             terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal(instance));
    463         }
    464         for (unsigned i = 0; i < outputs.size(); i++) {
    465             Value * const produced = kernel->getProducedItemCount(instance, outputs[i].name, terminated);
     436            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal());
     437        }
     438        const auto & outputs = kernel->getStreamOutputs();
     439        for (unsigned i = 0; i < outputs.size(); ++i) {
     440            Value * const produced = kernel->getProducedItemCount(outputs[i].name, terminated);
    466441            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    467442            assert (producedPos.count(buf) == 0);
    468443            producedPos.emplace(buf, produced);
    469444        }
    470         for (unsigned i = 0; i < inputs.size(); i++) {
    471             Value * const processed = kernel->getProcessedItemCount(instance, inputs[i].name);
    472             const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
    473             const auto f = consumedPos.find(buf);
    474             assert (f != consumedPos.end());
    475             Value *& consumed = std::get<1>(f->second);
    476             if (consumed) {
    477                 consumed = iBuilder->CreateSelect(iBuilder->CreateICmpULT(processed, consumed), processed, consumed);
    478             } else {
    479                 consumed = processed;
    480             }
    481         }
    482         Value * const segNo = kernel->acquireLogicalSegmentNo(instance);
    483         kernel->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    484     }
    485     // update the consumed position phi nodes with the last min processed count of each input stream
    486     for (const auto entry : consumedPos) {
    487         PHINode * const phi = std::get<0>(entry.second);
    488         Value * const value = std::get<1>(entry.second);
    489         phi->addIncoming(value ? value : phi, pipelineLoop);
    490     }
     445
     446        Value * const segNo = kernel->acquireLogicalSegmentNo();
     447        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     448    }
     449
    491450    iBuilder->CreateCondBr(terminated, pipelineExit, pipelineLoop);
    492451    iBuilder->SetInsertPoint(pipelineExit);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r5363 r5408  
    1111
    1212void generateSegmentParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels);
    13 
    1413void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels);
    15 
    1614void generateParallelPipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels);
    1715
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5402 r5408  
    3434
    3535    iBuilder->SetInsertPoint(readBlock);
     36
     37
     38
     39
    3640    // how many pages are required to have enough data for the segment plus one overflow block?
    3741    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     
    4145
    4246    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
     47
    4348    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    4449    Value * const bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, bytesToRead);
     
    4752    bufferedSize = iBuilder->CreateAdd(bufferedSize, bytesRead);
    4853    setBufferedSize("InputStream", bufferedSize);
     54
    4955    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(unreadSize, segmentSize), setTermination, stdInExit);
    5056
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5398 r5408  
    1414#include <llvm/Support/raw_ostream.h>
    1515#include <llvm/IR/CFG.h>
     16#include <kernels/kernel.h>
    1617
    1718namespace llvm { class Constant; }
     
    221222
    222223void ExtensibleBuffer::reserveBytes(Value * const self, llvm::Value * const requiredSize) const {
     224
     225    // TODO: tweak this function to allow AlignedMalloc to begin copying prior to waiting for the
     226    // consumers to finish. MRemap could be used with the "do not move" flag set safely.
     227
    223228    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    224229    Value * const currentSize = iBuilder->CreateLoad(capacityPtr);
    225230    BasicBlock * const entry = iBuilder->GetInsertBlock();
    226     BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
    227     BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
    228     iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(requiredSize, currentSize), resume, expand);
     231    Function * const parent = entry->getParent();
     232    IntegerType * const sizeTy = iBuilder->getSizeTy();
     233    ConstantInt * const zero = iBuilder->getInt32(0);
     234    ConstantInt * const one = iBuilder->getInt32(1);
     235
     236    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", parent);
     237    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", parent);
     238
     239    Value * noExpansionNeeded = iBuilder->CreateICmpULT(requiredSize, currentSize);
     240
     241    kernel::KernelBuilder * const kernel = getProducer();
     242    auto consumers = kernel->getStreamOutputs();
     243    if (LLVM_UNLIKELY(consumers.empty())) {
     244        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, expand);
     245    } else { // we cannot risk expanding this buffer until all of the consumers have finished reading the data
     246
     247        ConstantInt * const zeroSz = iBuilder->getSize(0);
     248        Value * const segNo = kernel->acquireLogicalSegmentNo();
     249        const auto n = consumers.size();
     250
     251        BasicBlock * load[n + 1];
     252        BasicBlock * wait[n];
     253        for (unsigned i = 0; i < n; ++i) {
     254            load[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Load", parent);
     255            wait[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Wait", parent);
     256        }
     257        load[n] = expand;
     258        iBuilder->CreateLikelyCondBr(noExpansionNeeded, resume, load[0]);
     259
     260        for (unsigned i = 0; i < n; ++i) {
     261
     262            iBuilder->SetInsertPoint(load[i]);
     263            Value * const outputConsumers = kernel->getConsumerState(consumers[i].name);
     264            Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
     265            Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
     266            Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, zeroSz);
     267            iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
     268
     269            iBuilder->SetInsertPoint(wait[i]);
     270            PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
     271            consumerPhi->addIncoming(zeroSz, load[i]);
     272
     273            Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
     274            Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
     275            Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
     276            Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
     277            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
     278            Value * const next = iBuilder->CreateICmpEQ(nextConsumerIdx, consumerCount);
     279            iBuilder->CreateCondBr(next, load[i + 1], wait[i]);
     280
     281        }
     282        expand->moveAfter(wait[n - 1]);
     283        resume->moveAfter(expand);
     284    }
    229285    iBuilder->SetInsertPoint(expand);
    230286    Value * const reservedSize = iBuilder->CreateShl(requiredSize, 1);
    231287#ifdef __APPLE__
    232288    Value * newAddr = iBuilder->CreateAlignedMalloc(reservedSize, iBuilder->getCacheAlignment());
    233     Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     289    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
    234290    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    235291    iBuilder->CreateMemCpy(newAddr, baseAddr, currentSize, iBuilder->getCacheAlignment());
     
    239295    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    240296#else
    241     Value * const baseAddrPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     297    Value * const baseAddrPtr = iBuilder->CreateGEP(self, {zero, one});
    242298    Value * const baseAddr = iBuilder->CreateLoad(baseAddrPtr);
    243299    Value * newAddr = iBuilder->CreateMRemap(baseAddr, currentSize, reservedSize);
    244300    newAddr = iBuilder->CreatePointerCast(newAddr, baseAddr->getType());
    245301#endif
     302    iBuilder->CreateStore(newAddr, baseAddrPtr);
    246303    iBuilder->CreateStore(reservedSize, capacityPtr);
    247     iBuilder->CreateStore(newAddr, baseAddrPtr);
    248304    iBuilder->CreateBr(resume);
    249305    iBuilder->SetInsertPoint(resume);
     
    261317
    262318Value * ExtensibleBuffer::getBaseAddress(Value * const self) const {
    263     return iBuilder->CreateLoad(iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     319    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     320    return iBuilder->CreateLoad(ptr);
    264321}
    265322
     
    577634, mAddressSpace(AddressSpace)
    578635, mStreamSetBufferPtr(nullptr)
    579 , mBaseType(baseType) {
     636, mBaseType(baseType)
     637, mProducer(nullptr) {
    580638
    581639}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5398 r5408  
    8686    virtual ~StreamSetBuffer() = 0;
    8787
     88    kernel::KernelBuilder * getProducer() const {
     89        return mProducer;
     90    }
     91
     92    const std::vector<kernel::KernelBuilder *> & getConsumers() const {
     93        return mConsumers;
     94    }
     95
    8896protected:
    8997
     
    99107    virtual llvm::Value * getBaseAddress(llvm::Value * self) const;
    100108
    101 protected:
    102     const BufferKind                mBufferKind;
    103     IDISA::IDISA_Builder * const    iBuilder;
    104     llvm::Type * const              mType;
    105     const size_t                    mBufferBlocks;
    106     const unsigned                  mAddressSpace;
    107     llvm::Value *                   mStreamSetBufferPtr;
    108     llvm::Type * const              mBaseType;
    109     std::string                     mUniqueID;
     109    void setProducer(kernel::KernelBuilder * const producer) {
     110        assert (producer);
     111        mProducer = producer;
     112    }
     113
     114    void addConsumer(kernel::KernelBuilder * const consumer) {
     115        assert (consumer);
     116        mConsumers.push_back(consumer);
     117    }
     118
     119protected:
     120    const BufferKind                        mBufferKind;
     121    IDISA::IDISA_Builder * const            iBuilder;
     122    llvm::Type * const                      mType;
     123    const size_t                            mBufferBlocks;
     124    const unsigned                          mAddressSpace;
     125    llvm::Value *                           mStreamSetBufferPtr;
     126    llvm::Type * const                      mBaseType;
     127    std::string                             mUniqueID;
     128    kernel::KernelBuilder *                 mProducer;
     129    std::vector<kernel::KernelBuilder *>    mConsumers;
    110130};   
    111131
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5407 r5408  
    2727#include <kernels/object_cache.h>
    2828#include <kernels/pipeline.h>
    29 #include <kernels/interface.h>
    3029#include <kernels/kernel.h>
    3130#ifdef CUDA_ENABLED
     
    219218
    220219void ParabixDriver::generatePipelineIR() {
    221     for (auto & kb : mKernelList) {
    222         kb->addKernelDeclarations(mMainModule);
     220    // note: instantiation of all kernels must occur prior to initialization
     221    for (const auto & k : mKernelList) {
     222        k->addKernelDeclarations(mMainModule);
     223    }
     224    for (const auto & k : mKernelList) {
     225        k->createInstance();
     226    }
     227    for (const auto & k : mKernelList) {
     228        k->initializeInstance();
    223229    }
    224230    if (codegen::pipelineParallel) {
Note: See TracChangeset for help on using the changeset viewer.