Changeset 6273


Ignore:
Timestamp:
Jan 5, 2019, 5:35:18 PM (9 months ago)
Author:
nmedfort
Message:

More work on optimization branch. First stage of stateless kernel optimization

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp

    r6272 r6273  
    143143 ** ------------------------------------------------------------------------------------------------------------- */
    144144void BlockOrientedKernel::incrementCountableItemCounts(const std::unique_ptr<KernelBuilder> & b) {
    145 
    146145    // Update the processed item counts
    147     for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
    148         const Binding & input = mInputStreamSets[i];
     146    for (const Binding & input : getInputStreamSetBindings()) {
    149147        if (isCountable(input)) {
    150148            const ProcessingRate & rate = input.getRate();
     
    153151                offset = b->getSize(ceiling(getUpperBound(input) * getStride()));
    154152            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
    155                 offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
     153                offset = getPopCountRateItemCount(b, rate);
    156154            }
    157155            Value * const initial = b->getProcessedItemCount(input.getName());
     
    160158        }
    161159    }
    162 
    163160    // Update the produced item counts
    164     for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
    165         const Binding & output = mOutputStreamSets[i];
     161    for (const Binding & output : getOutputStreamSetBindings()) {
    166162        if (isCountable(output)) {
    167163            const ProcessingRate & rate = output.getRate();
     
    170166                offset = b->getSize(ceiling(getUpperBound(output) * getStride()));
    171167            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
    172                 offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
     168                offset = getPopCountRateItemCount(b, rate);
    173169            }
    174170            Value * const initial = b->getProducedItemCount(output.getName());
     
    178174    }
    179175}
     176
     177/** ------------------------------------------------------------------------------------------------------------- *
     178 * @brief getPopCountRateItemCount
     179 ** ------------------------------------------------------------------------------------------------------------- */
     180Value * BlockOrientedKernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b,
     181                                                      const ProcessingRate & rate) {
     182    assert (rate.isPopCount() || rate.isNegatedPopCount());
     183    Port refPort;
     184    unsigned refIndex = 0;
     185    std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
     186    assert (refPort == Port::Input);
     187    Value * array = nullptr;
     188    if (rate.isNegatedPopCount()) {
     189        array = mNegatedPopCountRateArray[refIndex];
     190    } else {
     191        array = mPopCountRateArray[refIndex];
     192    }
     193    assert (array && "missing pop count array attribute");
     194    Value * const currentSum = b->CreateLoad(b->CreateGEP(array, mStrideBlockIndex));
     195    Value * const priorIndex = b->CreateSub(mStrideBlockIndex, b->getSize(1));
     196    Value * const priorSum = b->CreateLoad(b->CreateGEP(array, priorIndex));
     197    return b->CreateSub(currentSum, priorSum);
     198}
     199
    180200/** ------------------------------------------------------------------------------------------------------------- *
    181201 * @brief getRemainingItems
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6272 r6273  
    5050 * @brief setInstance
    5151 ** ------------------------------------------------------------------------------------------------------------- */
    52 void  Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
     52void Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
    5353    assert ("handle cannot be null!" && handle);
    5454    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
     
    125125 * @brief addScalarToMap
    126126 ** ------------------------------------------------------------------------------------------------------------- */
    127 void Kernel::addScalarToMap(const llvm::StringRef name, const ScalarType scalarType, const unsigned index) {
     127void Kernel::addScalarToMap(const StringRef name, const ScalarType scalarType, const unsigned index) {
    128128    const auto r = mScalarMap.insert(std::make_pair(name, ScalarField{scalarType, index}));
    129129    if (LLVM_UNLIKELY(!r.second)) {
    130130        const ScalarField & sf = r.first->second;
    131         if (LLVM_UNLIKELY(sf.type != scalarType || sf.index != index)) {
     131        if (LLVM_UNLIKELY(sf.Type != scalarType || sf.Index != index)) {
    132132            report_fatal_error(getName() + " already contains scalar " + name);
    133133        }
     
    138138 * @brief addScalarToMap
    139139 ** ------------------------------------------------------------------------------------------------------------- */
    140 void Kernel::addStreamToMap(const llvm::StringRef name, const Port port, const unsigned index) {
     140void Kernel::addStreamToMap(const StringRef name, const Port port, const unsigned index) {
    141141    const auto r = mStreamSetMap.insert(std::make_pair(name, std::make_pair(port, index)));
    142142    if (LLVM_UNLIKELY(!r.second)) {
     
    203203        b->setScalarField(binding.getName(), &*(++args));
    204204    }
    205 
    206205    const auto numOfOutputs = mOutputStreamSets.size();
    207206    for (unsigned i = 0; i < numOfOutputs; i++) {
     
    215214    mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
    216215    b->CreateStore(b->getFalse(), mTerminationSignalPtr);
     216    initializeLocalScalarValues(b);
    217217    generateInitializeMethod(b);
    218218    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     
    369369        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
    370370    }
     371
    371372    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
    372373    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
     
    520521
    521522    // initialize the termination signal if this kernel can set it
     523    mTerminationSignalPtr = nullptr;
    522524    if (canTerminate) {
    523525        mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
     
    525527    }
    526528
     529    initializeLocalScalarValues(b);
    527530    generateKernelMethod(b);
    528531
     
    546549
    547550    // return the termination signal (if one exists)
    548     if (canTerminate) {
     551    if (mTerminationSignalPtr) {
    549552        b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
    550553        mTerminationSignalPtr = nullptr;
     
    612615        }
    613616    }
     617    initializeLocalScalarValues(b);
    614618    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
    615619    const auto outputs = getFinalOutputScalars(b);
     
    727731    mKernelStateType = mModule->getTypeByName(getName());
    728732    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
    729         std::vector<llvm::Type *> fields;
     733        std::vector<Type *> fields;
    730734        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size() + 1);
    731735        for (const Binding & scalar : mInputScalars) {
     
    749753 * @brief addInternalScalar
    750754 ** ------------------------------------------------------------------------------------------------------------- */
    751 void Kernel::addInternalScalar(llvm::Type * type, const llvm::StringRef name) {
     755void Kernel::addInternalScalar(Type * type, const StringRef name) {
    752756    const auto index = mInternalScalars.size();
    753757    mInternalScalars.emplace_back(type, name);
     
    756760
    757761/** ------------------------------------------------------------------------------------------------------------- *
    758  * @brief getScalarIndex
    759  ** ------------------------------------------------------------------------------------------------------------- */
    760 unsigned Kernel::getScalarIndex(const llvm::StringRef fieldName) const {
    761     const auto & field = getScalarField(fieldName);
    762     assert (mKernelStateType);
    763     unsigned index = field.index;
    764     switch (field.type) {
    765         case ScalarType::Internal:
    766             index += mOutputScalars.size();
    767         case ScalarType::Output:
    768             index += mInputScalars.size();
    769         case ScalarType::Input:
    770             break;
    771     }
    772     assert (index < mKernelStateType->getStructNumElements());
    773     return index;
     762 * @brief addLocalScalar
     763 ** ------------------------------------------------------------------------------------------------------------- */
     764void Kernel::addLocalScalar(Type * type, const StringRef name) {
     765    const auto index = mLocalScalars.size();
     766    mLocalScalars.emplace_back(type, name);
     767    addScalarToMap(name, ScalarType::Local, index);
    774768}
    775769
     
    811805 * Create a fixed length string hash of the given str
    812806 ** ------------------------------------------------------------------------------------------------------------- */
    813 std::string Kernel::getStringHash(const llvm::StringRef str) {
     807std::string Kernel::getStringHash(const StringRef str) {
    814808
    815809    uint32_t digest[5]; // 160 bits in total
     
    890884 * @brief getScalarField
    891885 ** ------------------------------------------------------------------------------------------------------------- */
    892 const Kernel::ScalarField & Kernel::getScalarField(const llvm::StringRef name) const {
     886const Kernel::ScalarField & Kernel::getScalarField(const StringRef name) const {
    893887    assert (!mScalarMap.empty());
    894888    const auto f = mScalarMap.find(name);
     
    901895
    902896/** ------------------------------------------------------------------------------------------------------------- *
     897 * @brief getScalarFieldPtr
     898 ** ------------------------------------------------------------------------------------------------------------- */
     899Value * Kernel::getScalarFieldPtr(KernelBuilder & b, const StringRef name) const {
     900    const auto & field = getScalarField(name);
     901    assert (mKernelStateType);
     902    unsigned index = field.Index;
     903    switch (field.Type) {
     904        case ScalarType::Local:
     905            return mLocalScalarPtr[index];
     906            case ScalarType::Internal:
     907            index += mOutputScalars.size();
     908        case ScalarType::Output:
     909            index += mInputScalars.size();
     910        case ScalarType::Input:
     911            break;
     912    }
     913    assert (index < mKernelStateType->getStructNumElements());
     914    return b.CreateGEP(getHandle(), {b.getInt32(0), b.getInt32(index)});
     915}
     916
     917/** ------------------------------------------------------------------------------------------------------------- *
     918 * @brief initializeLocalScalarValues
     919 ** ------------------------------------------------------------------------------------------------------------- */
     920void Kernel::initializeLocalScalarValues(const std::unique_ptr<KernelBuilder> & b) {
     921    if (LLVM_LIKELY(mLocalScalars.empty())) {
     922        return;
     923    }
     924    mLocalScalarPtr.resize(mLocalScalars.size());
     925    const auto end = mScalarMap.end();
     926    for (auto i = mScalarMap.begin(); i != end; ++i) {
     927        ScalarField & field = i->getValue();
     928        if (LLVM_UNLIKELY(field.Type == ScalarType::Local)) {
     929            const auto index = field.Index;
     930            const Binding & local = mLocalScalars[index];
     931            Value * const scalar = b->CreateAlloca(local.getType());
     932            b->CreateStore(ConstantAggregateZero::get(local.getType()), scalar);
     933            mLocalScalarPtr[index] = scalar;
     934        }
     935    }
     936}
     937
     938/** ------------------------------------------------------------------------------------------------------------- *
     939 * @brief isStateless
     940 ** ------------------------------------------------------------------------------------------------------------- */
     941bool Kernel::isStateless() const {
     942    #warning return whether the kernel struct is zero-length; move cycle count to pipeline first.
     943    return false;
     944}
     945
     946/** ------------------------------------------------------------------------------------------------------------- *
    903947 * @brief getInputScalarBinding
    904948 ** ------------------------------------------------------------------------------------------------------------- */
    905 Binding & Kernel::getInputScalarBinding(const llvm::StringRef name) {
     949Binding & Kernel::getInputScalarBinding(const StringRef name) {
    906950    const ScalarField & field = getScalarField(name);
    907     if (LLVM_UNLIKELY(field.type != ScalarType::Input)) {
     951    if (LLVM_UNLIKELY(field.Type != ScalarType::Input)) {
    908952        report_fatal_error(getName() + "." + name + "is not an input scalar");
    909953    }
    910     return mInputScalars[field.index];
     954    return mInputScalars[field.Index];
    911955}
    912956
     
    914958 * @brief getOutputScalarBinding
    915959 ** ------------------------------------------------------------------------------------------------------------- */
    916 Binding & Kernel::getOutputScalarBinding(const llvm::StringRef name) {
     960Binding & Kernel::getOutputScalarBinding(const StringRef name) {
    917961    const ScalarField & field = getScalarField(name);
    918     if (LLVM_UNLIKELY(field.type != ScalarType::Output)) {
     962    if (LLVM_UNLIKELY(field.Type != ScalarType::Output)) {
    919963        report_fatal_error(getName() + "." + name + "is not an output scalar");
    920964    }
    921     return mOutputScalars[field.index];
     965    return mOutputScalars[field.Index];
    922966}
    923967
     
    925969 * @brief getStreamPort
    926970 ** ------------------------------------------------------------------------------------------------------------- */
    927 Kernel::StreamSetPort Kernel::getStreamPort(const llvm::StringRef name) const {
     971Kernel::StreamSetPort Kernel::getStreamPort(const StringRef name) const {
    928972    const auto f = mStreamSetMap.find(name);
    929973    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
     
    937981 * @brief getBinding
    938982 ** ------------------------------------------------------------------------------------------------------------- */
    939 const Binding & Kernel::getStreamBinding(const llvm::StringRef name) const {
     983const Binding & Kernel::getStreamBinding(const StringRef name) const {
    940984    Port port; unsigned index;
    941985    std::tie(port, index) = getStreamPort(name);
     
    10911135
    10921136/** ------------------------------------------------------------------------------------------------------------- *
    1093  * @brief getPopCountRateItemCount
    1094  ** ------------------------------------------------------------------------------------------------------------- */
    1095 Value * Kernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, Value * const strideIndex) {
    1096     assert (rate.isPopCount() || rate.isNegatedPopCount());
    1097     Port refPort;
    1098     unsigned refIndex = 0;
    1099     std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
    1100     assert (refPort == Port::Input);
    1101     Value * array = nullptr;
    1102     if (rate.isNegatedPopCount()) {
    1103         array = mNegatedPopCountRateArray[refIndex];
    1104     } else {
    1105         array = mPopCountRateArray[refIndex];
    1106     }
    1107     assert (array && "missing pop count array attribute");
    1108     Value * const currentSum = b->CreateLoad(b->CreateGEP(array, strideIndex));
    1109     Value * const priorIndex = b->CreateSub(strideIndex, b->getSize(1));
    1110     Value * const priorSum = b->CreateLoad(b->CreateGEP(array, priorIndex));
    1111     return b->CreateSub(currentSum, priorSum);
    1112 }
    1113 
    1114 /** ------------------------------------------------------------------------------------------------------------- *
    11151137 * @brief generateKernelMethod
    11161138 ** ------------------------------------------------------------------------------------------------------------- */
     
    11381160std::string Kernel::getDefaultFamilyName() const {
    11391161    std::string tmp;
    1140     llvm::raw_string_ostream out(tmp);
     1162    raw_string_ostream out(tmp);
    11411163    out << "F";
    11421164    out << getStride();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6272 r6273  
    7070public:
    7171
    72     enum class ScalarType { Input, Output, Internal };
     72    enum class ScalarType { Input, Output, Internal, Local };
    7373
    7474    struct ScalarField {
    75         ScalarType    type;
    76         unsigned      index;
    77         ScalarField(const ScalarType type, const unsigned index) : type(type), index(index) { }
     75        ScalarType    Type;
     76        unsigned      Index;
     77
     78        ScalarField(const ScalarType type, const unsigned index)
     79        : Type(type), Index(index) {
     80
     81        }
    7882        constexpr ScalarField(const ScalarField & other) = default;
    7983        ScalarField & operator=(ScalarField && other) = default;
     
    145149    virtual bool isCachable() const { return false; }
    146150
     151    bool isStateless() const;
     152
    147153    unsigned getStride() const { return mStride; }
    148154
     
    270276    void setInputScalar(const llvm::StringRef name, Scalar * value) {
    271277        const auto & field = getScalarField(name);
    272         assert(field.type == ScalarType::Input);
    273         setInputScalarAt(field.index, value);
     278        assert(field.Type == ScalarType::Input);
     279        setInputScalarAt(field.Index, value);
    274280    }
    275281
     
    303309    void setOutputScalar(const llvm::StringRef name, Scalar * value) {
    304310        const auto & field = getScalarField(name);
    305         assert(field.type == ScalarType::Output);
    306         setOutputScalarAt(field.index, value);
     311        assert(field.Type == ScalarType::Output);
     312        setOutputScalarAt(field.Index, value);
    307313    }
    308314
     
    312318
    313319    void addInternalScalar(llvm::Type * type, const llvm::StringRef name);
     320
     321    void addLocalScalar(llvm::Type * type, const llvm::StringRef name);
    314322
    315323    llvm::Value * getHandle() const {
     
    388396    LLVM_READNONE std::string getDefaultFamilyName() const;
    389397
    390     LLVM_READNONE unsigned getScalarIndex(const llvm::StringRef name) const;
    391 
    392398    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> &) { }
    393399
     
    443449        return mAvailableInputItems[index];
    444450    }
    445 
    446     llvm::Value * getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const strideIndex);
    447451
    448452    LLVM_READNONE bool canSetTerminateSignal() const {
     
    488492private:
    489493
     494    void initializeLocalScalarValues(const std::unique_ptr<KernelBuilder> & b);
     495
    490496    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
    491497
     
    499505
    500506    LLVM_READNONE const ScalarField & getScalarField(const llvm::StringRef name) const;
     507
     508    llvm::Value * getScalarFieldPtr(KernelBuilder & b, const llvm::StringRef name) const;
    501509
    502510    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b);
     
    518526    Bindings                        mInputStreamSets;
    519527    Bindings                        mOutputStreamSets;
     528
    520529    Bindings                        mInputScalars;
    521530    Bindings                        mOutputScalars;
    522531    Bindings                        mInternalScalars;
     532    Bindings                        mLocalScalars;
    523533
    524534    llvm::Function *                mCurrentMethod;
     
    528538    llvm::Value *                   mIsFinal;
    529539    llvm::Value *                   mNumOfStrides;
     540
     541    std::vector<llvm::Value *>      mLocalScalarPtr;
    530542
    531543    std::vector<llvm::Value *>      mProcessedInputItemPtr;
     
    659671    void incrementCountableItemCounts(const std::unique_ptr<KernelBuilder> & b);
    660672
     673    llvm::Value * getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate);
     674
    661675    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
    662676
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r6258 r6273  
    1616using Port = Kernel::Port;
    1717
    18 inline Value * KernelBuilder::getScalarFieldPtr(Value * const handle, Value * const index) {
    19     assert ("handle cannot be null" && handle);
    20     assert ("index cannot be null" && index);
    21     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    22         CreateAssert(handle, "getScalarFieldPtr: handle cannot be null!");
    23     }
    24     #ifndef NDEBUG
    25     const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
    26     const Function * const builderFunction = GetInsertBlock()->getParent();
    27     assert ("handle is not from the current function." && handleFunction == builderFunction);
    28     #endif
    29     return CreateGEP(handle, {getInt32(0), index});
    30 }
    31 
    32 inline Value * KernelBuilder::getScalarFieldPtr(Value * const handle, const std::string & fieldName) {
    33     ConstantInt * const index = getInt32(mKernel->getScalarIndex(fieldName));
    34     return getScalarFieldPtr(handle, index);
    35 }
    36 
    37 Value * KernelBuilder::getScalarFieldPtr(Value * const index) {
    38     return getScalarFieldPtr(mKernel->getHandle(), index);
    39 }
    40 
    4118Value * KernelBuilder::getScalarFieldPtr(const StringRef fieldName) {
    42     return getScalarFieldPtr(mKernel->getHandle(), fieldName);
     19    return mKernel->getScalarFieldPtr(*this, fieldName);
    4320}
    4421
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r6258 r6273  
    156156    const unsigned mStride;
    157157
    158     llvm::Value * getScalarFieldPtr(llvm::Value * handle, llvm::Value * index);
    159 
    160     llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName);
    161 
    162158    std::string getKernelName() const final;
    163159
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.cpp

    r6272 r6273  
    3838
    3939/** ------------------------------------------------------------------------------------------------------------- *
    40  * @brief isParamAddressable
    41  ** ------------------------------------------------------------------------------------------------------------- */
    42 inline bool isParamAddressable(const Binding & binding) {
     40 * @brief isParamConstant
     41 ** ------------------------------------------------------------------------------------------------------------- */
     42inline bool isParamConstant(const Binding & binding) {
    4343    if (binding.isDeferred()) {
    44         return true;
    45     }
    46     const ProcessingRate & rate = binding.getRate();
    47     return (rate.isBounded() || rate.isUnknown());
    48 }
    49 
    50 /** ------------------------------------------------------------------------------------------------------------- *
    51  * @brief isParamConstant
    52  ** ------------------------------------------------------------------------------------------------------------- */
    53 inline bool isParamConstant(const Binding & binding) {
    54     assert (!binding.isDeferred());
     44        return false;
     45    }
    5546    const ProcessingRate & rate = binding.getRate();
    5647    return rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount();
     
    5849
    5950/** ------------------------------------------------------------------------------------------------------------- *
    60  * @brief hasParam
    61  ** ------------------------------------------------------------------------------------------------------------- */
    62 inline bool hasParam(const Binding & binding) {
    63     return !binding.getRate().isRelative();
    64 }
    65 
    66 /** ------------------------------------------------------------------------------------------------------------- *
    67  * @brief callKernel
    68  ** ------------------------------------------------------------------------------------------------------------- */
    69 void OptimizationBranch::callKernel(const std::unique_ptr<KernelBuilder> & b,
    70                                     const Kernel * const kernel,
    71                                     Value * const first, Value * const last,
    72                                     PHINode * const terminatedPhi) {
    73 #if 0
    74     std::vector<Value *> args;
    75     args.reserve(mCurrentMethod->arg_size());
    76     args.push_back(kernel->getHandle()); // handle
    77     args.push_back(b->CreateSub(last, first)); // numOfStrides
    78     const auto numOfInputs = kernel->getNumOfStreamInputs();
    79     for (unsigned i = 0; i < numOfInputs; i++) {
    80 
    81         const Binding & input = kernel->getInputStreamSetBinding(i);
    82         const StreamSetBuffer * const buffer = mStreamSetInputBuffers[i];
    83         // logical base input address
    84         args.push_back(buffer->getBaseAddress(b.get()));
    85         // processed input items
    86         args.push_back(mProcessedInputItems[i]);
    87         // accessible input items (after non-deferred processed item count)
    88         args.push_back();
    89         // TODO: What if one of the branches requires this but the other doesn't?
    90         if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    91             args.push_back(b->CreateGEP(mPopCountRateArray[i], first));
    92         }
    93         if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    94             args.push_back(b->CreateGEP(mNegatedPopCountRateArray[i], first));
    95         }
    96     }
    97 
    98     const auto numOfOutputs = kernel->getNumOfStreamOutputs();
    99     for (unsigned i = 0; i < numOfOutputs; ++i) {
    100         const Binding & output = kernel->getOutputStreamSetBinding(i);
    101         const auto unmanaged = !output.hasAttribute(AttrId::ManagedBuffer);
    102         if (unmanaged) {
    103             const StreamSetBuffer * const buffer = mStreamSetOutputBuffers[i];
    104             args.push_back(buffer->getBaseAddress(b.get()));
    105         }
    106         args.push_back(mProducedOutputItems[i]);
    107         if (unmanaged) {
    108             // writable
    109         } else {
    110             // consumed
    111         }
    112     }
    113 
    114     Value * terminated = b->CreateCall(kernel->getDoSegmentFunction(b->getModule()), args);
    115     if (terminatedPhi) {
    116         if (LLVM_UNLIKELY(kernel->canSetTerminateSignal())) {
    117             terminated = b->getFalse();
    118         }
    119         terminatedPhi->addIncoming(terminated, b->GetInsertBlock());
    120     }
    121 #endif
     51 * @brief isLocalBuffer
     52 ** ------------------------------------------------------------------------------------------------------------- */
     53inline bool isLocalBuffer(const Binding & output) {
     54    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
    12255}
    12356
     
    12659 ** ------------------------------------------------------------------------------------------------------------- */
    12760void OptimizationBranch::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
    128 #if 0
    12961
    13062    BasicBlock * const loopCond = b->CreateBasicBlock("cond");
     
    13971    const auto numOfConditionInputs = isa<StreamSet>(mCondition) ? 1 : 0;
    14072    const auto numOfInputs = getNumOfStreamInputs() - numOfConditionInputs;
    141     std::vector<Value *> initialInputItems(numOfInputs, nullptr);
    142 
    143     mPartialAccessibleInputItems.resize(numOfInputs);
    144 
     73    std::vector<llvm::Value *> initialProcessedInputItems(numOfInputs, nullptr);
    14574    for (unsigned i = 0; i < numOfInputs; ++i) {
    146         if (isParamAddressable(mInputStreamSets[i])) {
    147             initialInputItems[i] = b->CreateLoad(mProcessedInputItemPtr[i]);
    148         }
    149         mPartialAccessibleInputItems[i] = mAccessibleInputItems[i];
     75        if (isParamConstant(mInputStreamSets[i])) {
     76            initialProcessedInputItems[i] = b->CreateLoad(mProcessedInputItemPtr[i]);
     77        }
    15078    }
    15179
    15280    const auto numOfOutputs = getNumOfStreamOutputs();
    153     std::vector<Value *> initialOutputItems(numOfOutputs, nullptr);
     81    std::vector<llvm::Value *> initialProducedOutputItems(numOfOutputs, nullptr);
    15482    for (unsigned i = 0; i < numOfOutputs; ++i) {
    155         if (isParamAddressable(mOutputStreamSets[i])) {
    156             initialOutputItems[i] = b->CreateLoad(mProducedOutputItemPtr[i]);
    157         }
    158         mPartialAccessibleInputItems[i] = mAccessibleInputItems[i];
     83        if (isParamConstant(mOutputStreamSets[i])) {
     84            initialProducedOutputItems[i] = b->CreateLoad(mProducedOutputItemPtr[i]);
     85        }
    15986    }
    16087
     
    16592    if (canSetTerminateSignal()) {
    16693        b->SetInsertPoint(mergePaths);
    167         terminatedPhi = b->CreatePHINode(b->getInt1Ty(), 2);
     94        terminatedPhi = b->CreatePHI(b->getInt1Ty(), 2);
    16895    }
    16996
     
    173100    first->addIncoming(ZERO, entry);
    174101    PHINode * const last = b->CreatePHI(sizeTy, 3);
     102    PHINode * const state = b->CreatePHI(b->getInt1Ty(), 3);
     103    state->addIncoming(b->getFalse(), entry);
    175104
    176105    mProcessedInputItems.resize(numOfInputs);
     106    mAccessibleInputItemPhi.resize(numOfInputs);
    177107    for (unsigned i = 0; i < numOfInputs; ++i) {
    178         if (initialInputItems[i]) {
    179             PHINode * const inputPhi = b->CreatePHI(sizeTy, 3);
    180             inputPhi->addIncoming(initialInputItems[i], entry);
     108        if (initialProcessedInputItems[i]) {
     109            PHINode * const inputPhi = b->CreatePHI(sizeTy, 2);
     110            inputPhi->addIncoming(initialProcessedInputItems[i], entry);
    181111            mProcessedInputItems[i] = inputPhi;
    182112        } else {
    183113            mProcessedInputItems[i] = mProcessedInputItemPtr[i];
    184114        }
     115        PHINode * const accessiblePhi = b->CreatePHI(sizeTy, 2);
     116        accessiblePhi->addIncoming(mAccessibleInputItems[i], entry);
     117        mAccessibleInputItemPhi[i] = accessiblePhi;
    185118    }
    186119
    187120    mProducedOutputItems.resize(numOfOutputs);
     121    mWritableOrConsumedOutputItemPhi.resize(numOfOutputs);
    188122    for (unsigned i = 0; i < numOfOutputs; ++i) {
    189         if (initialOutputItems[i]) {
    190             PHINode * const outputPhi = b->CreatePHI(sizeTy, 3);
    191             outputPhi->addIncoming(initialOutputItems[i], entry);
     123        if (initialProducedOutputItems[i]) {
     124            PHINode * const outputPhi = b->CreatePHI(sizeTy, 2);
     125            outputPhi->addIncoming(initialProducedOutputItems[i], entry);
    192126            mProducedOutputItems[i] = outputPhi;
    193127        } else {
    194128            mProducedOutputItems[i] = mProducedOutputItemPtr[i];
    195129        }
     130        PHINode * const writablePhi = b->CreatePHI(sizeTy, 2);
     131        if (isLocalBuffer(mOutputStreamSets[i])) {
     132            writablePhi->addIncoming(mConsumedOutputItems[i], entry);
     133        } else {
     134            writablePhi->addIncoming(mWritableOutputItems[i], entry);
     135        }
     136        mWritableOrConsumedOutputItemPhi[i] = writablePhi;
    196137    }
    197138
     
    199140
    200141        last->addIncoming(ZERO, entry);
    201 
    202         PHINode * const state = b->CreatePHI(b->getInt1Ty(), 3);
    203         state->addIncoming(b->getFalse(), entry);
    204142
    205143        BasicBlock * const summarizeOneStride = b->CreateBasicBlock("summarizeOneStride", nonZeroPath);
     
    211149        Value * const streamCount = b->getInputStreamSetCount(CONDITION_TAG);
    212150        Value * const blocksPerStride = b->CreateMul(streamCount, strideCount);
    213 
    214151
    215152        Value * const offset = b->CreateMul(last, strideCount);
     
    247184        first->addIncoming(first, checkStride);
    248185        state->addIncoming(nextState, checkStride);
     186        for (unsigned i = 0; i < numOfInputs; ++i) {
     187            if (initialProcessedInputItems[i]) {
     188                PHINode * const inputPhi = cast<PHINode>(mProcessedInputItems[i]);
     189                inputPhi->addIncoming(inputPhi, checkStride);
     190            }
     191            PHINode * const accessiblePhi = mAccessibleInputItemPhi[i];
     192            accessiblePhi->addIncoming(accessiblePhi, checkStride);
     193        }
     194
     195        for (unsigned i = 0; i < numOfOutputs; ++i) {
     196            if (initialProducedOutputItems[i]) {
     197                PHINode * const outputPhi = cast<PHINode>(mProducedOutputItems[i]);
     198                outputPhi->addIncoming(outputPhi, checkStride);
     199            }
     200            PHINode * const writablePhi = mWritableOrConsumedOutputItemPhi[i];
     201            writablePhi->addIncoming(writablePhi, checkStride);
     202        }
     203
    249204        b->CreateLikelyCondBr(checkNextStride, loopCond, processStrides);
    250205
     
    271226    b->CreateBr(mergePaths);
    272227
    273 
    274228    b->SetInsertPoint(mergePaths);
    275 
    276 #endif
     229    last->addIncoming(last, mergePaths);
     230    first->addIncoming(last, mergePaths);
     231    state->addIncoming(b->getFalse(), mergePaths);
     232    for (unsigned i = 0; i < numOfInputs; ++i) {
     233        const Binding & input = mInputStreamSets[i];
     234        Value * updatedInputCount = nullptr;
     235        if (isParamConstant(input)) {
     236            Value * const itemCount = getItemCountIncrement(b, input, first, last);
     237            PHINode * const inputPhi = cast<PHINode>(mProcessedInputItems[i]);
     238            updatedInputCount = b->CreateAdd(inputPhi, itemCount);
     239            inputPhi->addIncoming(updatedInputCount, mergePaths);
     240        }
     241        PHINode * const accessiblePhi = mAccessibleInputItemPhi[i];
     242        if (updatedInputCount == nullptr) {
     243            updatedInputCount = b->CreateLoad(mProducedOutputItems[i]);
     244        }
     245        Value * const remaining = b->CreateSub(accessiblePhi, updatedInputCount);
     246        accessiblePhi->addIncoming(remaining, mergePaths);
     247    }
     248
     249    for (unsigned i = 0; i < numOfOutputs; ++i) {
     250        const Binding & output = mOutputStreamSets[i];
     251        Value * updatedOutputCount = nullptr;
     252        if (isParamConstant(output)) {
     253            Value * const itemCount = getItemCountIncrement(b, output, first, last);
     254            PHINode * const outputPhi = cast<PHINode>(mProducedOutputItems[i]);
     255            updatedOutputCount = b->CreateAdd(outputPhi, itemCount);
     256            outputPhi->addIncoming(updatedOutputCount, mergePaths);
     257        }
     258        PHINode * const writablePhi = mWritableOrConsumedOutputItemPhi[i];
     259        if (isLocalBuffer(output)) {
     260            writablePhi->addIncoming(writablePhi, mergePaths);
     261        } else {
     262            if (updatedOutputCount == nullptr) {
     263                updatedOutputCount = b->CreateLoad(mProducedOutputItems[i]);
     264            }
     265            Value * const remaining = b->CreateSub(writablePhi, updatedOutputCount);
     266            writablePhi->addIncoming(remaining, mergePaths);
     267        }
     268    }
     269
     270    Value * const lastStride = b->CreateICmpNE(last, mNumOfStrides);
     271    Value * const finished = b->CreateOr(lastStride, terminatedPhi);
     272    b->CreateLikelyCondBr(finished, exit, loopCond);
     273
     274    b->SetInsertPoint(exit);
     275
     276}
     277
     278/** ------------------------------------------------------------------------------------------------------------- *
     279 * @brief callKernel
     280 ** ------------------------------------------------------------------------------------------------------------- */
     281void OptimizationBranch::callKernel(const std::unique_ptr<KernelBuilder> & b,
     282                                    const Kernel * const kernel,
     283                                    Value * const first, Value * const last,
     284                                    PHINode * const terminatedPhi) {
     285
     286    std::vector<Value *> args;
     287    args.reserve(mCurrentMethod->arg_size());
     288    args.push_back(kernel->getHandle()); // handle
     289    args.push_back(b->CreateSub(last, first)); // numOfStrides
     290    const auto numOfInputs = kernel->getNumOfStreamInputs();
     291    for (unsigned i = 0; i < numOfInputs; i++) {
     292
     293        const Binding & input = kernel->getInputStreamSetBinding(i);
     294        const auto & buffer = mStreamSetInputBuffers[i];
     295        // logical base input address
     296        args.push_back(buffer->getBaseAddress(b.get()));
     297        // processed input items
     298        args.push_back(mProcessedInputItems[i]);
     299        // accessible input items (after non-deferred processed item count)
     300        args.push_back(mAccessibleInputItemPhi[i]);
     301        // TODO: What if one of the branches requires this but the other doesn't?
     302        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     303            args.push_back(b->CreateGEP(mPopCountRateArray[i], first));
     304        }
     305        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     306            args.push_back(b->CreateGEP(mNegatedPopCountRateArray[i], first));
     307        }
     308    }
     309
     310    const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     311    for (unsigned i = 0; i < numOfOutputs; ++i) {
     312        const Binding & output = kernel->getOutputStreamSetBinding(i);
     313        if (!isLocalBuffer(output)) {
     314            const auto & buffer = mStreamSetOutputBuffers[i];
     315            args.push_back(buffer->getBaseAddress(b.get()));
     316        }
     317        args.push_back(mProducedOutputItems[i]);
     318        args.push_back(mWritableOrConsumedOutputItemPhi[i]);
     319    }
     320
     321    Value * terminated = b->CreateCall(kernel->getDoSegmentFunction(b->getModule()), args);
     322    if (terminatedPhi) {
     323        if (!kernel->canSetTerminateSignal()) {
     324            terminated = b->getFalse();
     325        }
     326        terminatedPhi->addIncoming(terminated, b->GetInsertBlock());
     327    }
     328
     329}
     330
     331/** ------------------------------------------------------------------------------------------------------------- *
     332 * @brief getItemCountIncrement
     333 ** ------------------------------------------------------------------------------------------------------------- */
     334Value * OptimizationBranch::getItemCountIncrement(const std::unique_ptr<KernelBuilder> & b, const Binding & binding,
     335                                                  Value * const first, Value * const last) const {
     336
     337    const ProcessingRate & rate = binding.getRate();
     338    if (rate.isFixed()) {
     339        Constant * const strideLength = b->getSize(ceiling(getUpperBound(binding) * getStride()));
     340        Value * const numOfStrides = b->CreateSub(last, first);
     341        return b->CreateMul(numOfStrides, strideLength);
     342    } else { assert (rate.isPopCount() || rate.isNegatedPopCount());
     343        Port refPort;
     344        unsigned refIndex = 0;
     345        std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
     346        assert (refPort == Port::Input);
     347        Value * array = nullptr;
     348        if (rate.isNegatedPopCount()) {
     349            array = mNegatedPopCountRateArray[refIndex];
     350        } else {
     351            array = mPopCountRateArray[refIndex];
     352        }
     353        Constant * const ONE = b->getSize(1);
     354        Value * const currentIndex = b->CreateSub(last, ONE);
     355        Value * const currentSum = b->CreateLoad(b->CreateGEP(array, currentIndex));
     356        Value * const priorIndex = b->CreateSub(first, ONE);
     357        Value * const priorSum = b->CreateLoad(b->CreateGEP(array, priorIndex));
     358        return b->CreateSub(currentSum, priorSum);
     359    }
     360
    277361}
    278362
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.h

    r6266 r6273  
    4848private:
    4949
     50    llvm::Value * getItemCountIncrement(const std::unique_ptr<KernelBuilder> & b, const Binding & binding,
     51                                        llvm::Value * const first, llvm::Value * const last) const;
     52
    5053    void callKernel(const std::unique_ptr<KernelBuilder> & b,
    5154                    const Kernel * const kernel, llvm::Value * const first, llvm::Value * const last,
     
    5457private:
    5558
    56     Relationship * const        mCondition;
    57     Kernel * const              mTrueKernel;
    58     Kernel * const              mFalseKernel;
    59     std::vector<llvm::Value *>  mProcessedInputItems;
    60     std::vector<llvm::Value *>  mPartialAccessibleInputItems;
     59    Relationship * const         mCondition;
     60    Kernel * const               mTrueKernel;
     61    Kernel * const               mFalseKernel;
    6162
    62     std::vector<llvm::Value *>  mProducedOutputItems;
     63    std::vector<llvm::Value *>   mProcessedInputItems;
     64    std::vector<llvm::PHINode *> mAccessibleInputItemPhi;
     65
     66    std::vector<llvm::Value *>   mProducedOutputItems;
     67    std::vector<llvm::PHINode *> mWritableOrConsumedOutputItemPhi;
    6368};
    6469
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_builder.cpp

    r6272 r6273  
    280280    }
    281281
    282     #ifndef NDEBUG
    283     std::vector<unsigned> index(numOfKernels + numOfCalls, std::numeric_limits<unsigned>::max());
    284     #else
    285282    std::vector<unsigned> index(numOfKernels + numOfCalls);
    286     #endif
    287283
    288284    unsigned j = 0;
     
    406402    raw_string_ostream out(name);
    407403
    408     out << "OptimizationBranch:";
     404    out << "OB:";
    409405
    410406    mCondition->getType()->print(out);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6266 r6273  
    235235    b->restoreIP(resumePoint);
    236236    setThreadState(b, processState);
    237 
    238237}
    239238
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6272 r6273  
    464464    Value * const baseOffset = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    465465    Value * const processed = mAlreadyProcessedPhi[inputPort];
    466     Constant * const LOG2_COUNT_WIDTH = getLog2BlockWidth(b);
    467     Value * const processedOffset = b->CreateLShr(processed, LOG2_COUNT_WIDTH);
     466    Value * const processedOffset = b->CreateLShr(processed, getLog2BlockWidth(b));
    468467    Value * const offset = b->CreateSub(processedOffset, baseOffset);
    469468    return b->CreateGEP(array, offset);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r6261 r6273  
    257257: BlockOrientedKernel(b, annotateKernelNameWithPabloDebugFlags(std::move(kernelName)),
    258258                      std::move(stream_inputs), std::move(stream_outputs),
    259                       std::move(scalar_parameters), std::move(scalar_outputs),
    260                       {Binding{b->getBitBlockType(), "EOFbit"}, Binding{b->getBitBlockType(), "EOFmask"}})
     259                      std::move(scalar_parameters), std::move(scalar_outputs), {})
    261260, PabloAST(PabloAST::ClassTypeId::Kernel, nullptr, mAllocator)
    262261, mPabloCompiler()
     
    266265, mStreamTy(nullptr)
    267266, mContext(nullptr) {
    268 
     267    addLocalScalar(b->getBitBlockType(), "EOFbit");
     268    addLocalScalar(b->getBitBlockType(), "EOFmask");
    269269}
    270270
Note: See TracChangeset for help on using the changeset viewer.