Ignore:
Timestamp:
Nov 2, 2018, 7:18:31 PM (7 months ago)
Author:
nmedfort
Message:

Initial version of PipelineKernel? + revised StreamSet? model.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6047 r6184  
    66#include "kernel.h"
    77#include <toolchain/toolchain.h>
     8#include <toolchain/driver.h>
     9#include <kernels/relationship.h>
    810#include <kernels/streamset.h>
     11#include <kernels/kernel_builder.h>
     12#include <llvm/IR/CallingConv.h>
     13#include <llvm/IR/DerivedTypes.h>
    914#include <llvm/IR/Constants.h>
    1015#include <llvm/IR/Function.h>
     
    1924#endif
    2025#include <llvm/Transforms/Utils/Local.h>
    21 #include <kernels/streamset.h>
     26#include <llvm/Support/Debug.h>
     27#include <boost/uuid/sha1.hpp>
     28#include <llvm/Support/Format.h>
    2229#include <sstream>
    23 #include <kernels/kernel_builder.h>
    24 #include <llvm/Support/Debug.h>
     30
    2531
    2632using namespace llvm;
    27 using namespace parabix;
     33using namespace boost;
     34using boost::container::flat_set;
    2835
    2936namespace kernel {
    3037
    31 /** ------------------------------------------------------------------------------------------------------------- *
    32  * @brief addScalar
    33  ** ------------------------------------------------------------------------------------------------------------- */
    34 unsigned Kernel::addScalar(Type * const type, const std::string & name) {
    35     if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    36         report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
    37     }
    38     if (LLVM_UNLIKELY(mKernelFieldMap.count(name))) {
    39         report_fatal_error(getName() + " already contains scalar field " + name);
    40     }
    41     const auto index = mKernelFields.size();
    42     mKernelFieldMap.emplace(name, index);
    43     mKernelFields.push_back(type);
    44     return index;
    45 }
    46 
    47 /** ------------------------------------------------------------------------------------------------------------- *
    48  * @brief addUnnamedScalar
    49  ** ------------------------------------------------------------------------------------------------------------- */
    50 unsigned Kernel::addUnnamedScalar(Type * const type) {
    51     if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    52         report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
    53     }
    54     const auto index = mKernelFields.size();
    55     mKernelFields.push_back(type);
    56     return index;
    57 }
    58 
    59 /** ------------------------------------------------------------------------------------------------------------- *
    60  * @brief bindPorts
    61  ** ------------------------------------------------------------------------------------------------------------- */
    62 void Kernel::bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    63 
    64     if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
    65         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
    66                            " input stream sets but was given "
    67                            + std::to_string(inputs.size()));
    68     }
    69 
    70     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    71         mStreamMap.emplace(mStreamSetInputs[i].getName(), std::make_pair(Port::Input, i));
    72     }
    73 
    74     for (unsigned i = 0; i < inputs.size(); ++i) {
    75         StreamSetBuffer * const buf = inputs[i];
    76         if (LLVM_UNLIKELY(buf == nullptr)) {
    77             report_fatal_error(getName() + ": input stream " + std::to_string(i) + " cannot be null");
    78         }
    79        // const Binding & input = mStreamSetInputs[i];
    80        // verifyBufferSize(input, buf);
    81         buf->addConsumer(this);
    82     }
    83 
    84     if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
    85         report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
    86                            + " output stream sets but was given "
    87                            + std::to_string(outputs.size()));
    88     }
    89 
    90     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    91         mStreamMap.emplace(mStreamSetOutputs[i].getName(), std::make_pair(Port::Output, i));
    92     }
    93 
    94     for (unsigned i = 0; i < outputs.size(); ++i) {
    95         StreamSetBuffer * const buf = outputs[i];
    96         if (LLVM_UNLIKELY(buf == nullptr)) {
    97             report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
    98         }
    99         const Binding & output = mStreamSetOutputs[i];
    100        // verifyBufferSize(output, buf);
    101         if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
    102             buf->setProducer(this);
     38using AttrId = Attribute::KindId;
     39using RateValue = ProcessingRate::RateValue;
     40using RateId = ProcessingRate::KindId;
     41using StreamPort = Kernel::StreamSetPort;
     42using Port = Kernel::Port;
     43
     44// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
     45// within the kernel state to hide the complexity from the user?
     46
     47const static auto INIT_SUFFIX = "_Init";
     48const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
     49const static auto TERMINATE_SUFFIX = "_Terminate";
     50
     51/** ------------------------------------------------------------------------------------------------------------- *
     52 * @brief setInstance
     53 ** ------------------------------------------------------------------------------------------------------------- */
     54void  Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
     55    assert ("handle cannot be null!" && handle);
     56    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
     57    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
     58    #ifndef NDEBUG
     59    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
     60    const Function * const builderFunction = b->GetInsertBlock()->getParent();
     61    assert ("handle is not from the current function." && (handleFunction == builderFunction));
     62    #endif
     63    mHandle = handle;
     64}
     65
     66/** ------------------------------------------------------------------------------------------------------------- *
     67 * @brief isLocalBuffer
     68 ** ------------------------------------------------------------------------------------------------------------- */
     69inline bool isLocalBuffer(const Binding & output) {
     70    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
     71}
     72
     73/** ------------------------------------------------------------------------------------------------------------- *
     74 * @brief addBaseKernelProperties
     75 *
     76 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
     77 ** ------------------------------------------------------------------------------------------------------------- */
     78void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
     79
     80    // Set the default kernel stride.
     81    if (mStride == 0) {
     82        mStride = b->getBitBlockWidth();
     83    }
     84
     85    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
     86    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
     87    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
     88    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
     89
     90    mStreamSetInputBuffers.clear();
     91    const auto numOfInputStreams = mInputStreamSets.size();
     92    mStreamSetInputBuffers.reserve(numOfInputStreams);
     93    for (unsigned i = 0; i < numOfInputStreams; ++i) {
     94        const Binding & input = mInputStreamSets[i];
     95        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
     96    }
     97
     98    mStreamSetOutputBuffers.clear();
     99    const auto numOfOutputStreams = mOutputStreamSets.size();
     100    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
     101    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
     102        const Binding & output = mOutputStreamSets[i];
     103        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
     104    }
     105
     106    IntegerType * const sizeTy = b->getSizeTy();
     107    PointerType * const sizePtrPtrTy = sizeTy->getPointerTo()->getPointerTo();
     108
     109    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
     110    addInternalScalar(sizeTy, TERMINATION_SIGNAL);
     111
     112    // TODO: if we had a way of easily calculating the number of processed/produced items of the
     113    // final stride of a non-deferred fixed rate stream, we could avoid storing the item counts.
     114    for (unsigned i = 0; i < numOfInputStreams; ++i) {
     115        const Binding & input = mInputStreamSets[i];
     116        addInternalScalar(sizeTy, input.getName() + PROCESSED_ITEM_COUNT_SUFFIX);
     117        if (LLVM_UNLIKELY(input.isDeferred())) {
     118            addInternalScalar(sizeTy, input.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
     119        }
     120    }
     121
     122    // If an output is a managed buffer, we need to store both the buffer and a set of consumers.
     123    Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy})->getPointerTo();
     124    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
     125        const Binding & output = mOutputStreamSets[i];
     126        addInternalScalar(sizeTy, output.getName() + PRODUCED_ITEM_COUNT_SUFFIX);
     127        if (LLVM_UNLIKELY(output.isDeferred())) {
     128            addInternalScalar(sizeTy, output.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
     129        }
     130        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     131            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
     132            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
     133            addInternalScalar(consumerSetTy, output.getName() + CONSUMER_SUFFIX);
     134            addInternalScalar(sizeTy, output.getName() + CONSUMED_ITEM_COUNT_SUFFIX);
     135        }
     136    }
     137
     138    // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
     139    // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
     140    // will be able to add instrumentation to cached modules without recompilation.
     141    addInternalScalar(b->getInt64Ty(), CYCLECOUNT_SCALAR);
     142
     143}
     144
     145/** ------------------------------------------------------------------------------------------------------------- *
     146 * @brief addScalarToMap
     147 ** ------------------------------------------------------------------------------------------------------------- */
     148void Kernel::addScalarToMap(const std::string & name, const ScalarType scalarType, const unsigned index) {
     149    const auto r = mScalarMap.emplace(name, ScalarField{scalarType, index});
     150    if (LLVM_UNLIKELY(!r.second)) {
     151        const ScalarField & sf = r.first->second;
     152        if (LLVM_UNLIKELY(sf.type != scalarType || sf.index != index)) {
     153            report_fatal_error(getName() + " already contains scalar " + name);
     154        }
     155    }
     156}
     157
     158/** ------------------------------------------------------------------------------------------------------------- *
     159 * @brief addScalarToMap
     160 ** ------------------------------------------------------------------------------------------------------------- */
     161void Kernel::addStreamToMap(const std::string & name, const Port port, const unsigned index) {
     162    const auto r = mStreamSetMap.emplace(name, std::make_pair(port, index));
     163    if (LLVM_UNLIKELY(!r.second)) {
     164        const StreamPort & sf = r.first->second;
     165        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
     166            report_fatal_error(getName() + " already contains stream " + name);
     167        }
     168    }
     169}
     170
     171/** ------------------------------------------------------------------------------------------------------------- *
     172 * @brief addKernelDeclarations
     173 ** ------------------------------------------------------------------------------------------------------------- */
     174void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
     175    if (mKernelStateType == nullptr) {
     176        throw std::runtime_error("Kernel state definition " + getName() + " has not been finalized.");
     177    }
     178    addInitializeDeclaration(b);
     179    addDoSegmentDeclaration(b);
     180    addFinalizeDeclaration(b);
     181    linkExternalMethods(b);
     182}
     183
     184/** ------------------------------------------------------------------------------------------------------------- *
     185 * @brief addInitializeDeclaration
     186 ** ------------------------------------------------------------------------------------------------------------- */
     187void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     188
     189    std::vector<Type *> params;
     190    params.push_back(mKernelStateType->getPointerTo());
     191    for (const Binding & binding : mInputScalars) {
     192        params.push_back(binding.getType());
     193    }
     194
     195    FunctionType * const initType = FunctionType::get(b->getVoidTy(), params, false);
     196    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
     197    initFunc->setCallingConv(CallingConv::C);
     198    initFunc->setDoesNotThrow();
     199    auto args = initFunc->arg_begin();
     200    args->setName("self");
     201    for (const Binding & binding : mInputScalars) {
     202        (++args)->setName(binding.getName());
     203    }
     204
     205    assert (std::next(args) == initFunc->arg_end());
     206}
     207
     208/** ------------------------------------------------------------------------------------------------------------- *
     209 * @brief callGenerateInitializeMethod
     210 ** ------------------------------------------------------------------------------------------------------------- */
     211void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
     212    const Kernel * const storedKernel = b->getKernel();
     213    b->setKernel(this);
     214    Value * const storedHandle = getHandle();
     215    mCurrentMethod = getInitFunction(b->getModule());
     216    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
     217    auto args = mCurrentMethod->arg_begin();
     218    setHandle(b, &*args);
     219    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     220        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
     221    }
     222    b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getHandle());
     223    for (const auto & binding : mInputScalars) {
     224        b->setScalarField(binding.getName(), &*(++args));
     225    }
     226
     227    const auto numOfOutputs = mOutputStreamSets.size();
     228    for (unsigned i = 0; i < numOfOutputs; i++) {
     229        const Binding & output = mOutputStreamSets[i];
     230        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     231            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
     232            mStreamSetOutputBuffers[i]->setHandle(b, handle);
     233        }
     234    }
     235    generateInitializeMethod(b);
     236    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     237        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
     238    }
     239    b->CreateRetVoid();
     240    b->setKernel(storedKernel);
     241    mHandle = storedHandle;
     242    mCurrentMethod = nullptr;
     243}
     244
     245/** ------------------------------------------------------------------------------------------------------------- *
     246 * @brief addDoSegmentDeclaration
     247 ** ------------------------------------------------------------------------------------------------------------- */
     248void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     249
     250    IntegerType * const sizeTy = b->getSizeTy();
     251    PointerType * const sizePtrTy = sizeTy->getPointerTo();
     252    Type * const voidTy = b->getVoidTy();
     253
     254    std::vector<Type *> params;
     255    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
     256    params.push_back(mKernelStateType->getPointerTo());  // self
     257    params.push_back(sizeTy); // numOfStrides
     258    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     259        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
     260        params.push_back(bufferType->getPointerTo()); // logical "base" input address
     261        params.push_back(sizeTy);  // accessible input items (after non-deferred processed item count)
     262        const Binding & input = mInputStreamSets[i];
     263        unsigned numOfPopCountArrays = 0;
     264        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     265            ++numOfPopCountArrays;
     266        }
     267        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     268            ++numOfPopCountArrays;
     269        }
     270        if (numOfPopCountArrays) {
     271            params.insert(params.end(), numOfPopCountArrays, sizePtrTy); // popCountRef array (length is numOfStrides)
     272        }
     273    }
     274    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
     275        const Binding & output = mOutputStreamSets[i];
     276        if (LLVM_LIKELY(!isLocalBuffer(output))) {
     277            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
     278            params.push_back(bufferType->getPointerTo()); // logical "base" output address
     279            params.push_back(sizeTy); // writable output items (after non-deferred produced item count)
     280        }
     281    }
     282
     283    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
     284    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
     285    doSegment->setCallingConv(CallingConv::C);
     286    doSegment->setDoesNotThrow();
     287    auto args = doSegment->arg_begin();
     288    args->setName("self");
     289    (++args)->setName("numOfStrides");
     290    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     291        const Binding & input = mInputStreamSets[i];
     292        (++args)->setName(input.getName());
     293        (++args)->setName(input.getName() + "_accessible");
     294        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     295            (++args)->setName(input.getName() + "_popCountArray");
     296        }
     297        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     298            (++args)->setName(input.getName() + "_negatedPopCountArray");
     299        }
     300    }
     301    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
     302        const Binding & output = mOutputStreamSets[i];
     303        if (LLVM_LIKELY(!isLocalBuffer(output))) {
     304            (++args)->setName(output.getName());
     305            (++args)->setName(output.getName() + "_writable");
     306        }
     307    }
     308    assert (std::next(args) == doSegment->arg_end());
     309}
     310
     311/** ------------------------------------------------------------------------------------------------------------- *
     312 * @brief callGenerateKernelMethod
     313 ** ------------------------------------------------------------------------------------------------------------- */
     314void Kernel::callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
     315
     316    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
     317    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
     318
     319    const Kernel * const storedKernel = b->getKernel();
     320    b->setKernel(this);
     321    Value * const storedHandle = getHandle();
     322    mCurrentMethod = getDoSegmentFunction(b->getModule());
     323    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
     324    auto args = mCurrentMethod->arg_begin();
     325    setHandle(b, &*(args++));
     326    mNumOfStrides = &*(args++);
     327    mIsFinal = b->CreateIsNull(mNumOfStrides);
     328    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     329        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
     330    }
     331    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
     332    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
     333    // and call system here but that is not an ideal way of handling this.
     334
     335    // TODO: use a graph to depict relations between binding? It would be better to first move to a model
     336    // where inputs and outputs are contained in a single parameter vector.
     337
     338    const auto numOfInputs = getNumOfStreamInputs();
     339    mAccessibleInputItems.resize(numOfInputs, nullptr);
     340    mAvailableInputItems.resize(numOfInputs, nullptr);
     341    mPopCountRateArray.resize(numOfInputs, nullptr);
     342    mNegatedPopCountRateArray.resize(numOfInputs, nullptr);
     343    for (unsigned i = 0; i < numOfInputs; i++) {
     344        const Binding & input = mInputStreamSets[i];
     345        assert (args != mCurrentMethod->arg_end());
     346        Value * const addr = &*(args++);
     347        auto & buffer = mStreamSetInputBuffers[i];
     348        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
     349        buffer->setHandle(b, localHandle);
     350        buffer->setBaseAddress(b.get(), addr);
     351        assert (args != mCurrentMethod->arg_end());
     352        Value * const accessible = &*(args++);
     353        mAccessibleInputItems[i] = accessible;
     354        Value * const processed = b->getNonDeferredProcessedItemCount(input);
     355        Value * capacity = b->CreateAdd(processed, accessible);
     356        mAvailableInputItems[i] = capacity;
     357        if (input.hasLookahead()) {
     358            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
     359        }
     360        buffer->setCapacity(b.get(), capacity);
     361        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
     362            assert (args != mCurrentMethod->arg_end());
     363            mPopCountRateArray[i] = &*(args++);
     364        }
     365        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     366            assert (args != mCurrentMethod->arg_end());
     367            mNegatedPopCountRateArray[i] = &*(args++);
     368        }
     369    }
     370
     371    // set all of the output buffers
     372    const auto numOfOutputs = getNumOfStreamOutputs();
     373    mWritableOutputItems.resize(numOfOutputs, nullptr);
     374    for (unsigned i = 0; i < numOfOutputs; i++) {
     375        // If an output is a managed buffer, the address is stored within the state instead
     376        // of being passed in through the function call.
     377        auto & buffer = mStreamSetOutputBuffers[i];
     378        const Binding & output = mOutputStreamSets[i];
     379        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     380            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
     381            buffer->setHandle(b, handle);
    103382        } else {
    104             report_fatal_error(getName() + ": output stream set " + output.getName()
    105                                + " is already produced by kernel " + buf->getProducer()->getName());
    106         }
    107     }
    108 
    109     mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    110     mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     383            assert (args != mCurrentMethod->arg_end());
     384            Value * const logicalBaseAddress = &*(args++);
     385            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
     386            buffer->setHandle(b, localHandle);
     387            buffer->setBaseAddress(b.get(), logicalBaseAddress);
     388            assert (args != mCurrentMethod->arg_end());
     389            Value * const writable = &*(args++);
     390            mWritableOutputItems[i] = writable;
     391            Value * const produced = b->getNonDeferredProducedItemCount(output);
     392            Value * const capacity = b->CreateAdd(produced, writable);
     393            buffer->setCapacity(b.get(), capacity);
     394        }
     395    }
     396    assert (args == mCurrentMethod->arg_end());
     397
     398    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     399        Value * const terminated = b->getTerminationSignal();
     400        b->CreateAssert(b->CreateNot(terminated), getName() + " was called after termination");
     401    }
     402
     403    // Calculate and/or load the accessible and writable item counts. If they are unneeded,
     404    // LLVM ought to recognize them as dead code and remove them.
     405    generateKernelMethod(b); // must be overridden by the Kernel subtype   
     406    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     407        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
     408    }
     409    b->CreateRetVoid();
     410
     411    // Clean up all of the constructed buffers.
     412    b->setKernel(storedKernel);
     413    mHandle = storedHandle;
     414    mCurrentMethod = nullptr;
     415    mIsFinal = nullptr;
     416    mNumOfStrides = nullptr;
     417    mAccessibleInputItems.clear();
     418    mPopCountRateArray.clear();
     419    mNegatedPopCountRateArray.clear();
     420}
     421
     422/** ------------------------------------------------------------------------------------------------------------- *
     423 * @brief addFinalizeDeclaration
     424 ** ------------------------------------------------------------------------------------------------------------- */
     425void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
     426    Type * resultType = nullptr;
     427    if (mOutputScalars.empty()) {
     428        resultType = b->getVoidTy();
     429    } else {
     430        const auto n = mOutputScalars.size();
     431        Type * outputType[n];
     432        for (unsigned i = 0; i < n; ++i) {
     433            outputType[i] = mOutputScalars[i].getType();
     434        }
     435        if (n == 1) {
     436            resultType = outputType[0];
     437        } else {
     438            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
     439        }
     440    }
     441    PointerType * const selfType = mKernelStateType->getPointerTo();
     442    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
     443    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
     444    terminateFunc->setCallingConv(CallingConv::C);
     445    terminateFunc->setDoesNotThrow();
     446    auto args = terminateFunc->arg_begin();
     447    args->setName("self");
     448    assert (std::next(args) == terminateFunc->arg_end());
     449}
     450
     451/** ------------------------------------------------------------------------------------------------------------- *
     452 * @brief callGenerateFinalizeMethod
     453 ** ------------------------------------------------------------------------------------------------------------- */
     454void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
     455
     456    const Kernel * const storedKernel = b->getKernel();
     457    b->setKernel(this);
     458    mCurrentMethod = getTerminateFunction(b->getModule());
     459    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
     460    auto args = mCurrentMethod->arg_begin();
     461    setHandle(b, &*(args++));
     462    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     463        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
     464    }
     465    const auto numOfOutputs = mOutputStreamSets.size();
     466    for (unsigned i = 0; i < numOfOutputs; i++) {
     467        const Binding & output = mOutputStreamSets[i];
     468        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
     469            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
     470            mStreamSetOutputBuffers[i]->setHandle(b, handle);
     471        }
     472    }
     473
     474    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
     475    const auto outputs = getFinalOutputScalars(b);
     476    b->CreateFree(mHandle);
     477    mHandle = nullptr;
     478
     479    if (outputs.empty()) {
     480        b->CreateRetVoid();
     481    } else {
     482        const auto n = outputs.size();
     483        if (n == 1) {
     484            b->CreateRet(outputs[0]);
     485        } else {
     486            b->CreateAggregateRet(outputs.data(), n);
     487        }
     488    }
     489
     490    b->setKernel(storedKernel);
     491    mCurrentMethod = nullptr;
     492}
     493
     494/** ------------------------------------------------------------------------------------------------------------- *
     495 * @brief callGenerateFinalizeMethod
     496 ** ------------------------------------------------------------------------------------------------------------- */
     497std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
     498    const auto n = mOutputScalars.size();
     499    std::vector<Value *> outputs(n);
     500    for (unsigned i = 0; i < n; ++i) {
     501        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
     502    }
     503    return outputs;
    111504}
    112505
     
    117510    std::stringstream cacheName;
    118511    cacheName << getName() << '_' << b->getBuilderUniqueName();
    119     for (const StreamSetBuffer * b: mStreamSetInputBuffers) {
    120         cacheName <<  ':' <<  b->getUniqueID();
    121     }
    122     for (const StreamSetBuffer * b: mStreamSetOutputBuffers) {
    123         cacheName <<  ':' <<  b->getUniqueID();
    124     }
    125512    return cacheName.str();
    126513}
    127 
    128514
    129515/** ------------------------------------------------------------------------------------------------------------- *
     
    140526 * @brief makeModule
    141527 ** ------------------------------------------------------------------------------------------------------------- */
    142 Module * Kernel::makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    143     Module * m = new Module(getCacheName(idb), idb->getContext());
    144     m->setTargetTriple(idb->getModule()->getTargetTriple());
    145     m->setDataLayout(idb->getModule()->getDataLayout());
     528Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
     529    Module * m = new Module(getCacheName(b), b->getContext());
     530    m->setTargetTriple(b->getModule()->getTargetTriple());
     531    m->setDataLayout(b->getModule()->getDataLayout());
    146532    return setModule(m);
    147533}
     
    149535
    150536/** ------------------------------------------------------------------------------------------------------------- *
     537 * @brief getInitFunction
     538 ** ------------------------------------------------------------------------------------------------------------- */
     539Function * Kernel::getInitFunction(Module * const module) const {
     540    const auto name = getName() + INIT_SUFFIX;
     541    Function * f = module->getFunction(name);
     542    if (LLVM_UNLIKELY(f == nullptr)) {
     543        report_fatal_error("Cannot find " + name);
     544    }
     545    return f;
     546}
     547
     548/** ------------------------------------------------------------------------------------------------------------- *
     549 * @brief getDoSegmentFunction
     550 ** ------------------------------------------------------------------------------------------------------------- */
     551Function * Kernel::getDoSegmentFunction(Module * const module) const {
     552    const auto name = getName() + DO_SEGMENT_SUFFIX;
     553    Function * f = module->getFunction(name);
     554    if (LLVM_UNLIKELY(f == nullptr)) {
     555        report_fatal_error("Cannot find " + name);
     556    }
     557    return f;
     558}
     559
     560/** ------------------------------------------------------------------------------------------------------------- *
     561 * @brief getTerminateFunction
     562 ** ------------------------------------------------------------------------------------------------------------- */
     563Function * Kernel::getTerminateFunction(Module * const module) const {
     564    const auto name = getName() + TERMINATE_SUFFIX;
     565    Function * f = module->getFunction(name);
     566    if (LLVM_UNLIKELY(f == nullptr)) {
     567        report_fatal_error("Cannot find " + name);
     568    }
     569    return f;
     570}
     571
     572/** ------------------------------------------------------------------------------------------------------------- *
    151573 * @brief prepareKernel
    152574 ** ------------------------------------------------------------------------------------------------------------- */
    153575void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
    154     assert ("KernelBuilder does not have a valid IDISA Builder" && b);
    155576    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    156577        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
    157578    }
    158     // verifyStreamSetDefinitions();
    159579    addBaseKernelProperties(b);
    160580    addInternalKernelProperties(b);
     
    164584    }
    165585    mKernelStateType = mModule->getTypeByName(getName());
     586
     587
    166588    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
    167         mKernelStateType = StructType::create(b->getContext(), mKernelFields, getName());
    168         assert (mKernelStateType);
    169     }
    170 }
    171 
     589        std::vector<llvm::Type *> fields;
     590        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
     591        for (const Binding & scalar : mInputScalars) {
     592            assert (scalar.getType());
     593            fields.push_back(scalar.getType());
     594        }
     595        for (const Binding & scalar : mOutputScalars) {
     596            assert (scalar.getType());
     597            fields.push_back(scalar.getType());
     598        }
     599        for (const Binding & scalar : mInternalScalars) {
     600            assert (scalar.getType());
     601            fields.push_back(scalar.getType());
     602        }
     603        mKernelStateType = StructType::create(b->getContext(), fields, getName());       
     604    }
     605
     606
     607
     608
     609    assert (isa<StructType>(mKernelStateType));
     610}
     611
     612/** ------------------------------------------------------------------------------------------------------------- *
     613 * @brief addInternalScalar
     614 ** ------------------------------------------------------------------------------------------------------------- */
     615void Kernel::addInternalScalar(llvm::Type * type, const std::string & name) {
     616    const auto index = mInternalScalars.size();
     617    mInternalScalars.emplace_back(type, name);
     618    addScalarToMap(name, ScalarType::Internal, index);
     619}
     620
     621/** ------------------------------------------------------------------------------------------------------------- *
     622 * @brief getScalarIndex
     623 ** ------------------------------------------------------------------------------------------------------------- */
     624unsigned Kernel::getScalarIndex(const std::string & fieldName) const {
     625    const auto & field = getScalarField(fieldName);
     626    assert (mKernelStateType);
     627    unsigned index = field.index;
     628    switch (field.type) {
     629        case ScalarType::Internal:
     630            index += mOutputScalars.size();
     631        case ScalarType::Output:
     632            index += mInputScalars.size();
     633        case ScalarType::Input:
     634            break;
     635    }
     636    assert (index < mKernelStateType->getStructNumElements());
     637    return index;
     638}
    172639
    173640/** ------------------------------------------------------------------------------------------------------------- *
     
    175642 ** ------------------------------------------------------------------------------------------------------------- */
    176643void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
    177     assert ("KernelBuilder does not have a valid IDISA Builder" && b);
    178644    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    179645        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
    180     }
    181     assert (getModule());   
     646    }
    182647    addBaseKernelProperties(b);
    183648    mKernelStateType = getModule()->getTypeByName(getName());
    184649    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    185         report_fatal_error("Kernel definition for " + getName() + " could not be found in the cache object");
    186     }
    187 }
    188 
    189 /** ------------------------------------------------------------------------------------------------------------- *
    190  * @brief containsFixedRate
    191  ** ------------------------------------------------------------------------------------------------------------- */
    192 bool containsFixedRate(const Bindings & bindings) {
    193     for (const Binding & binding : bindings) {
    194         const ProcessingRate & rate = binding.getRate();
    195         if (rate.isFixed()) {
    196             return true;
    197         }
    198     }
    199     return false;
    200 }
    201 
    202 /** ------------------------------------------------------------------------------------------------------------- *
    203  * @brief addBaseKernelProperties
    204  ** ------------------------------------------------------------------------------------------------------------- */
    205 void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
    206 
    207     const unsigned inputSetCount = mStreamSetInputs.size();
    208     const unsigned outputSetCount = mStreamSetOutputs.size();
    209 
    210     assert (inputSetCount == mStreamSetInputBuffers.size());
    211     assert (outputSetCount == mStreamSetOutputBuffers.size());
    212 
    213     if (mStride == 0) {
    214         // Set the default kernel stride.
    215         mStride = b->getBitBlockWidth();
    216     }
    217 
    218     IntegerType * const sizeTy = b->getSizeTy();
    219 
    220     addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    221     addScalar(sizeTy, TERMINATION_SIGNAL);
    222     // TODO: if we had a way of easily calculating the number of processed/produced items of the
    223     // final stride of a non-deferred fixed rate stream, we could avoid storing the item counts.
    224     for (unsigned i = 0; i < inputSetCount; i++) {
    225         const Binding & input = mStreamSetInputs[i];
    226         addScalar(sizeTy, input.getName() + PROCESSED_ITEM_COUNT_SUFFIX);
    227         if (LLVM_UNLIKELY(input.isDeferred())) {
    228             addScalar(sizeTy, input.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
    229         }
    230     }
    231     for (unsigned i = 0; i < outputSetCount; i++) {
    232         const Binding & output = mStreamSetOutputs[i];
    233         addScalar(sizeTy, output.getName() + PRODUCED_ITEM_COUNT_SUFFIX);
    234         if (LLVM_UNLIKELY(output.isDeferred())) {
    235             addScalar(sizeTy, output.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
    236         }
    237     }
    238     for (unsigned i = 0; i < inputSetCount; i++) {
    239         mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetInputs[i].getName() + BUFFER_SUFFIX);
    240     }
    241     for (unsigned i = 0; i < outputSetCount; i++) {
    242         mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetOutputs[i].getName() + BUFFER_SUFFIX);
    243     }
    244     for (const auto & binding : mScalarInputs) {
    245         addScalar(binding.getType(), binding.getName());
    246     }
    247     for (const auto & binding : mScalarOutputs) {
    248         addScalar(binding.getType(), binding.getName());
    249     }
    250     for (const auto & binding : mInternalScalars) {
    251         addScalar(binding.getType(), binding.getName());
    252     }
    253     Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizeTy->getPointerTo()->getPointerTo()})->getPointerTo();
    254     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    255         addScalar(consumerSetTy, mStreamSetOutputs[i].getName() + CONSUMER_SUFFIX);
    256     }
    257     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    258         addScalar(sizeTy, mStreamSetOutputs[i].getName() + CONSUMED_ITEM_COUNT_SUFFIX);
    259     }
    260     // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
    261     // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
    262     // will be able to add instrumentation to cached modules without recompilation.
    263     addScalar(b->getInt64Ty(), CYCLECOUNT_SCALAR);
    264 }
    265 
     650        report_fatal_error("Kernel definition for " + getName() + " was not found in the cache!");
     651    }
     652    assert (isa<StructType>(mKernelStateType));
     653}
    266654
    267655/** ------------------------------------------------------------------------------------------------------------- *
     
    270658 * Default kernel signature: generate the IR and emit as byte code.
    271659 ** ------------------------------------------------------------------------------------------------------------- */
    272 std::string Kernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    273     assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
     660std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
    274661    if (LLVM_UNLIKELY(hasSignature())) {
    275         generateKernel(idb);
     662        generateKernel(b);
    276663        std::string tmp;
    277664        raw_string_ostream signature(tmp);
     
    283670}
    284671
     672/** ------------------------------------------------------------------------------------------------------------- *
     673 * @brief getStringHash
     674 *
     675 * Create a fixed length string hash of the given str
     676 ** ------------------------------------------------------------------------------------------------------------- */
     677std::string Kernel::getStringHash(const std::string & str) {
     678
     679    uint32_t digest[5]; // 160 bits in total
     680    boost::uuids::detail::sha1 sha1;
     681    sha1.process_bytes(str.c_str(), str.size());
     682    sha1.get_digest(digest);
     683
     684    std::string buffer;
     685    buffer.reserve((5 * 8) + 1);
     686    raw_string_ostream out(buffer);
     687    for (unsigned i = 0; i < 5; ++i) {
     688        out << format_hex_no_prefix(digest[i], 8);
     689    }
     690    out.flush();
     691
     692    return buffer;
     693}
     694
     695/** ------------------------------------------------------------------------------------------------------------- *
     696 * @brief createInstance
     697 ** ------------------------------------------------------------------------------------------------------------- */
     698Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
     699    assert (mKernelStateType && "cannot create instance before calling prepareKernel() or prepareCachedKernel()");
     700    Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
     701    Value * handle = nullptr;
     702    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     703        handle = b->CreateAlignedMalloc(size, b->getPageSize());
     704        b->CreateMProtect(handle, size, CBuilder::Protect::READ);
     705    } else {
     706        handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
     707    }
     708//    mHandle = b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
     709//    return mHandle;
     710    return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
     711}
     712
     713/** ------------------------------------------------------------------------------------------------------------- *
     714 * @brief initializeInstance
     715 ** ------------------------------------------------------------------------------------------------------------- */
     716void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
     717    assert (args.size() == getNumOfScalarInputs() + 1);
     718    assert (args[0] && "cannot initialize before creation");
     719    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
     720    b->setKernel(this);
     721    Function * const init = getInitFunction(b->getModule());
     722    b->CreateCall(init, args);
     723}
    285724
    286725/** ------------------------------------------------------------------------------------------------------------- *
    287726 * @brief generateKernel
    288727 ** ------------------------------------------------------------------------------------------------------------- */
    289 void Kernel::generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    290     assert ("Kernel does not have a valid IDISA Builder" && idb.get());
     728void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
    291729    if (LLVM_UNLIKELY(mIsGenerated)) return;
    292     idb->setModule(mModule);
    293     addKernelDeclarations(idb);
    294     callGenerateInitializeMethod(idb);
    295     callGenerateDoSegmentMethod(idb);
    296     callGenerateFinalizeMethod(idb);
     730    b->setKernel(this);
     731    b->setModule(mModule);
     732    addKernelDeclarations(b);
     733    callGenerateInitializeMethod(b);
     734    callGenerateKernelMethod(b);
     735    callGenerateFinalizeMethod(b);
     736    addAdditionalFunctions(b);
    297737    mIsGenerated = true;
    298738}
    299739
    300 
    301 /** ------------------------------------------------------------------------------------------------------------- *
    302  * @brief callGenerateInitializeMethod
    303  ** ------------------------------------------------------------------------------------------------------------- */
    304 inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
    305     mCurrentMethod = getInitFunction(b->getModule());
    306     b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    307     Function::arg_iterator args = mCurrentMethod->arg_begin();
    308     setInstance(&*(args++));
    309     b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
    310     for (const auto & binding : mScalarInputs) {
    311         b->setScalarField(binding.getName(), &*(args++));
    312     }
    313     for (const auto & binding : mStreamSetOutputs) {
    314         b->setConsumerLock(binding.getName(), &*(args++));
    315     }
    316     generateInitializeMethod(b);
    317     b->CreateRetVoid();
    318 }
    319 
    320 /** ------------------------------------------------------------------------------------------------------------- *
    321  * @brief callGenerateDoSegmentMethod
    322  ** ------------------------------------------------------------------------------------------------------------- */
    323 inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<kernel::KernelBuilder> & b) {
    324     mCurrentMethod = getDoSegmentFunction(b->getModule());
    325     b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    326     auto args = mCurrentMethod->arg_begin();
    327     setInstance(&*(args++));
    328     mIsFinal = &*(args++);
    329     const auto n = mStreamSetInputs.size();
    330     mAvailableItemCount.resize(n, nullptr);
    331     for (unsigned i = 0; i < n; i++) {
    332         assert (args != mCurrentMethod->arg_end());
    333         mAvailableItemCount[i] = &*(args++);
    334     }
    335     assert (args == mCurrentMethod->arg_end());
    336     generateKernelMethod(b); // must be overridden by the Kernel subtype
    337     mIsFinal = nullptr;
    338     mAvailableItemCount.clear();
    339     b->CreateRetVoid();
    340 }
    341 
    342 
    343 /** ------------------------------------------------------------------------------------------------------------- *
    344  * @brief callGenerateFinalizeMethod
    345  ** ------------------------------------------------------------------------------------------------------------- */
    346 inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
    347     mCurrentMethod = getTerminateFunction(b->getModule());
    348     b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    349     auto args = mCurrentMethod->arg_begin();
    350     setInstance(&*(args++));
    351     generateFinalizeMethod(b); // may be overridden by the Kernel subtype
    352     const auto n = mScalarOutputs.size();
    353     if (n == 0) {
    354         b->CreateRetVoid();
    355     } else {
    356         Value * outputs[n];
    357         for (unsigned i = 0; i < n; ++i) {
    358             outputs[i] = b->getScalarField(mScalarOutputs[i].getName());
    359         }
    360         if (n == 1) {
    361             b->CreateRet(outputs[0]);
    362         } else {
    363             b->CreateAggregateRet(outputs, n);
    364         }
    365     }
    366 }
    367 
    368 
    369 /** ------------------------------------------------------------------------------------------------------------- *
    370  * @brief getScalarIndex
    371  ** ------------------------------------------------------------------------------------------------------------- */
    372 unsigned Kernel::getScalarIndex(const std::string & name) const {
    373     const auto f = mKernelFieldMap.find(name);
    374     if (LLVM_UNLIKELY(f == mKernelFieldMap.end())) {
    375         assert ("kernel does not contain the requested scalar" && false);
     740/** ------------------------------------------------------------------------------------------------------------- *
     741 * @brief finalizeInstance
     742 ** ------------------------------------------------------------------------------------------------------------- */
     743Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
     744    assert (mHandle && "was not set");
     745    Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
     746    mHandle = nullptr;
     747    if (mOutputScalars.empty()) {
     748        assert (!result || result->getType()->isVoidTy());
     749        result = nullptr;
     750    }
     751    return result;
     752
     753}
     754
     755/** ------------------------------------------------------------------------------------------------------------- *
     756 * @brief getScalarField
     757 ** ------------------------------------------------------------------------------------------------------------- */
     758const Kernel::ScalarField & Kernel::getScalarField(const std::string & name) const {
     759    assert (!mScalarMap.empty());
     760    const auto f = mScalarMap.find(name);
     761    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
    376762        report_fatal_error(getName() + " does not contain scalar: " + name);
    377763    }
     
    379765}
    380766
    381 
    382 /** ------------------------------------------------------------------------------------------------------------- *
    383  * @brief createInstance
    384  ** ------------------------------------------------------------------------------------------------------------- */
    385 Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & idb) {
    386     assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    387     if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    388         report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
    389     }
    390     setInstance(idb->CreateCacheAlignedAlloca(mKernelStateType));
    391     return getInstance();
    392 }
    393 
    394 
    395 /** ------------------------------------------------------------------------------------------------------------- *
    396  * @brief initializeInstance
    397  ** ------------------------------------------------------------------------------------------------------------- */
    398 void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b) {
    399     assert ("KernelBuilder does not have a valid IDISA Builder" && b);
    400     if (LLVM_UNLIKELY(getInstance() == nullptr)) {
    401         report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
    402     }
    403     std::vector<Value *> args;
    404     args.reserve(1 + mInitialArguments.size() + mStreamSetInputBuffers.size() + (mStreamSetOutputBuffers.size() * 2));
    405     args.push_back(getInstance());
    406     for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
    407         Value * arg = mInitialArguments[i];
    408         if (LLVM_UNLIKELY(arg == nullptr)) {
    409             report_fatal_error(getName() + ": initial argument " + std::to_string(i)
    410                                + " cannot be null when calling createInstance()");
    411         }
    412         args.push_back(arg);
    413     }
    414     for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
    415         assert (mStreamSetInputBuffers[i]);
    416         Value * arg = mStreamSetInputBuffers[i]->getStreamSetHandle();
    417         if (LLVM_UNLIKELY(arg == nullptr)) {
    418             report_fatal_error(getName() + ": input stream set " + std::to_string(i)
    419                                + " was not allocated prior to calling createInstance()");
    420         }
    421         args.push_back(arg);
    422     }
    423     assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
    424     for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    425         assert (mStreamSetOutputBuffers[i]);
    426         Value * arg = mStreamSetOutputBuffers[i]->getStreamSetHandle();
    427         if (LLVM_UNLIKELY(arg == nullptr)) {
    428             report_fatal_error(getName() + ": output stream set " + std::to_string(i)
    429                                + " was not allocated prior to calling createInstance()");
    430         }
    431         args.push_back(arg);
    432     }
    433     assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
    434     IntegerType * const sizeTy = b->getSizeTy();
    435     PointerType * const sizePtrTy = sizeTy->getPointerTo();
    436     PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
    437     StructType * const consumerTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy});
    438     for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    439         const auto output = mStreamSetOutputBuffers[i];
    440         const auto & consumers = output->getConsumers();
    441         const auto n = consumers.size();
    442         AllocaInst * const outputConsumers = b->CreateAlloca(consumerTy);
    443         Value * const consumerSegNoArray = b->CreateAlloca(ArrayType::get(sizePtrTy, n));
    444         for (unsigned i = 0; i < n; ++i) {
    445             Kernel * const consumer = consumers[i];
    446             assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
    447             b->setKernel(consumer);
    448             Value * const segmentNoPtr = b->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
    449             b->CreateStore(segmentNoPtr, b->CreateGEP(consumerSegNoArray, { b->getInt32(0), b->getInt32(i) }));
    450         }
    451         b->setKernel(this);
    452         Value * const consumerCountPtr = b->CreateGEP(outputConsumers, {b->getInt32(0), b->getInt32(0)});
    453         b->CreateStore(b->getSize(n), consumerCountPtr);
    454         Value * const consumerSegNoArrayPtr = b->CreateGEP(outputConsumers, {b->getInt32(0), b->getInt32(1)});
    455         b->CreateStore(b->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
    456         args.push_back(outputConsumers);
    457     }
    458     b->CreateCall(getInitFunction(b->getModule()), args);
    459 }
    460 
    461 /** ------------------------------------------------------------------------------------------------------------- *
    462  * @brief finalizeInstance
    463  ** ------------------------------------------------------------------------------------------------------------- */
    464 void Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & idb) {
    465     assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
    466     mOutputScalarResult = idb->CreateCall(getTerminateFunction(idb->getModule()), { getInstance() });
     767/** ------------------------------------------------------------------------------------------------------------- *
     768 * @brief getInputScalarBinding
     769 ** ------------------------------------------------------------------------------------------------------------- */
     770Binding & Kernel::getInputScalarBinding(const std::string & name) {
     771    const ScalarField & field = getScalarField(name);
     772    if (LLVM_UNLIKELY(field.type != ScalarType::Input)) {
     773        report_fatal_error(getName() + "." + name + "is not an input scalar");
     774    }
     775    return mInputScalars[field.index];
     776}
     777
     778/** ------------------------------------------------------------------------------------------------------------- *
     779 * @brief getOutputScalarBinding
     780 ** ------------------------------------------------------------------------------------------------------------- */
     781Binding & Kernel::getOutputScalarBinding(const std::string & name) {
     782    const ScalarField & field = getScalarField(name);
     783    if (LLVM_UNLIKELY(field.type != ScalarType::Output)) {
     784        report_fatal_error(getName() + "." + name + "is not an output scalar");
     785    }
     786    return mOutputScalars[field.index];
    467787}
    468788
     
    470790 * @brief getStreamPort
    471791 ** ------------------------------------------------------------------------------------------------------------- */
    472 Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
    473     const auto f = mStreamMap.find(name);
    474     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    475         assert (!mStreamMap.empty());
     792Kernel::StreamSetPort Kernel::getStreamPort(const std::string & name) const {
     793    const auto f = mStreamSetMap.find(name);
     794    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
     795        assert (!mStreamSetMap.empty());
    476796        report_fatal_error(getName() + " does not contain stream set " + name);
    477797    }
     
    480800
    481801/** ------------------------------------------------------------------------------------------------------------- *
    482  * @brief getStreamPort
    483  ** ------------------------------------------------------------------------------------------------------------- */
    484 const Binding & Kernel::getBinding(const std::string & name) const {
     802 * @brief getBinding
     803 ** ------------------------------------------------------------------------------------------------------------- */
     804const Binding & Kernel::getStreamBinding(const std::string & name) const {
    485805    Port port; unsigned index;
    486806    std::tie(port, index) = getStreamPort(name);
    487     return (port == Port::Input) ? getStreamInput(index) : getStreamOutput(index);
     807    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
    488808}
    489809
     
    491811 * @brief getLowerBound
    492812 ** ------------------------------------------------------------------------------------------------------------- */
    493 ProcessingRate::RateValue Kernel::getLowerBound(const ProcessingRate & rate) const {
     813RateValue Kernel::getLowerBound(const Binding & binding) const {
     814    const ProcessingRate & rate = binding.getRate();
     815    if (rate.hasReference()) {
     816        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
     817    } else {
     818        return rate.getLowerBound();
     819    }
     820}
     821
     822/** ------------------------------------------------------------------------------------------------------------- *
     823 * @brief getUpperBound
     824 ** ------------------------------------------------------------------------------------------------------------- */
     825RateValue Kernel::getUpperBound(const Binding & binding) const {
     826    const ProcessingRate & rate = binding.getRate();
     827    if (rate.hasReference()) {
     828        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
     829    } else {
     830        return rate.getUpperBound();
     831    }
     832}
     833
     834/** ------------------------------------------------------------------------------------------------------------- *
     835 * @brief isCountable
     836 ** ------------------------------------------------------------------------------------------------------------- */
     837bool Kernel::isCountable(const Binding & binding) const {
     838    const ProcessingRate & rate = binding.getRate();
     839    if (rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount()) {
     840        return true;
     841//    } else if (rate.isRelative()) {
     842//        return isCountable(getStreamBinding(rate.getReference()));
     843    } else {
     844        return false;
     845    }
     846}
     847
     848/** ------------------------------------------------------------------------------------------------------------- *
     849 * @brief isCalculable
     850 ** ------------------------------------------------------------------------------------------------------------- */
     851bool Kernel::isCalculable(const Binding & binding) const {
     852    const ProcessingRate & rate = binding.getRate();
    494853    if (rate.isFixed() || rate.isBounded()) {
    495         return rate.getLowerBound();
    496     } else if (rate.hasReference()) {
    497         return rate.getLowerBound() * getLowerBound(getBinding(rate.getReference()).getRate());
    498     } else { // if (rate.isUnknown())
    499         return 0;
    500     }
    501 }
    502 
    503 /** ------------------------------------------------------------------------------------------------------------- *
    504  * @brief getUpperBound
    505  ** ------------------------------------------------------------------------------------------------------------- */
    506 ProcessingRate::RateValue Kernel::getUpperBound(const ProcessingRate &rate) const {
    507     if (rate.isFixed() || rate.isBounded() || rate.isPopCount()) {
    508         return rate.getUpperBound();
    509     } else if (rate.hasReference()) {
    510         return rate.getUpperBound() * getUpperBound(getBinding(rate.getReference()).getRate());
    511     } else { // if (rate.isUnknown())
    512         return 0;
    513     }
    514 }
    515 
    516 /** ------------------------------------------------------------------------------------------------------------- *
    517  * @brief verifyStreamSetDefinitions
    518  ** ------------------------------------------------------------------------------------------------------------- */
    519 void Kernel::verifyStreamSetDefinitions() const {
    520     unsigned numOfPrincipalStreams = 0;
    521     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    522         const Binding & input = mStreamSetInputs[i];
    523         const ProcessingRate & rate = input.getRate();
    524         // If a stream can be relative to a relative or fixed rate stream, it complicates the pipeline and
    525         // multiblock kernel. For now, report an error.
    526         if (LLVM_UNLIKELY(rate.hasReference())) {
    527             Port port; unsigned index;
    528             std::tie(port, index) = getStreamPort(rate.getReference());
    529             if (LLVM_UNLIKELY(port == Port::Output)) {
    530                 report_fatal_error(getName() + ": input stream \"" + input.getName() + "\" cannot refer to an output stream");
    531             }
    532             if (LLVM_UNLIKELY(index >= i)) {
    533                 report_fatal_error(getName() + ": \"" + input.getName() + "\" must be ordered after its reference stream");
    534             }
    535             if (rate.isRelative()) {
    536                 const ProcessingRate & refRate = getStreamInput(index).getRate();
    537                 if (LLVM_UNLIKELY(refRate.isRelative() || refRate.isFixed())) {
    538                     report_fatal_error(getName() + ": \"" + input.getName() + "\" cannot be relative to a fixed or relative rate stream");
    539                 }
    540             }
    541         } else if (LLVM_UNLIKELY(rate.isUnknown())) {
    542             report_fatal_error(getName() + ": \"" + input.getName() + "\" cannot be an unknown rate");
    543         }
    544         if (LLVM_UNLIKELY(input.isPrincipal())) {
    545             ++numOfPrincipalStreams;
    546         }
    547         bool hasFixedOnlyAttribute = false;
    548         for (const Attribute & attr : input.getAttributes()) {
    549             switch (attr.getKind()) {
    550                 case Attribute::KindId::Add:
    551                 case Attribute::KindId::RoundUpTo:
    552                 case Attribute::KindId::Deferred:
    553                     hasFixedOnlyAttribute = false;
    554                     break;
    555                 default: break;
    556             }
    557         }
    558         if (rate.isFixed()) {
    559 
    560 
    561 
    562         } else if (LLVM_UNLIKELY(hasFixedOnlyAttribute)) {
    563             report_fatal_error(getName() + ": Add, RoundUpTo and Deferred cannot be applied to non-Fixed rate input stream \"" + input.getName() + "\"");
    564         }
    565     }
    566     if (LLVM_UNLIKELY(numOfPrincipalStreams > 1)) {
    567         report_fatal_error(getName() + ": may only have one principal stream set");
    568     }
    569     for (unsigned i = 0; i < mStreamSetOutputs.size(); ++i) {
    570         const Binding & output = mStreamSetOutputs[i];
    571         const ProcessingRate & rate = output.getRate();
    572         if (LLVM_UNLIKELY(rate.hasReference())) {
    573             Port port; unsigned index;
    574             std::tie(port, index) = getStreamPort(rate.getReference());
    575             if (LLVM_UNLIKELY(rate.isPopCount() && port == Port::Output)) {
    576                 report_fatal_error(getName() + ": the popcount rate of \"" + output.getName() + "\" cannot refer to another output stream");
    577             }
    578             if (LLVM_UNLIKELY(port == Port::Output && index >= i)) {
    579                 report_fatal_error(getName() + ": \"" + output.getName() + "\" must be ordered after its reference stream");
    580             }
    581             if (rate.isRelative()) {
    582                 const Binding & ref = (port == Port::Input) ? getStreamInput(index) : getStreamOutput(index);
    583                 const ProcessingRate & refRate = ref.getRate();
    584                 if (LLVM_UNLIKELY(refRate.isRelative() || refRate.isFixed())) {
    585                     report_fatal_error(getName() + ": \"" + output.getName() + "\" cannot be relative to a fixed or relative rate stream");
    586                 }
    587             }
    588         }
    589         if (LLVM_UNLIKELY(output.isPrincipal())) {
    590             report_fatal_error(getName() + ": output stream \"" + output.getName() + "\" cannot be a principal stream");
    591         }
    592 
    593         bool hasAddOrRoundUpTo = false;
    594         bool hasDeferred = false;
    595         for (const Attribute & attr : output.getAttributes()) {
    596             switch (attr.getKind()) {
    597                 case Attribute::KindId::Add:
    598                 case Attribute::KindId::RoundUpTo:
    599                     hasAddOrRoundUpTo = true;
    600                     break;
    601                 case Attribute::KindId::Deferred:
    602                     hasDeferred = false;
    603                     break;
    604                 default: break;
    605             }
    606         }
    607 
    608         if (LLVM_UNLIKELY((hasAddOrRoundUpTo || hasDeferred) && !(rate.isFixed() || rate.isPopCount()))) {
    609             report_fatal_error(getName() + ": " + output.getName() + " cannot have an Add, RoundUpTo or Deferred attribute");
    610         }       
    611         if (LLVM_UNLIKELY(hasDeferred && hasAddOrRoundUpTo)) {
    612             report_fatal_error(getName() + ": cannot apply Add or RoundUpTo attributes to the Deferred output stream " + output.getName());
    613         }
    614     }
    615 }
    616 
    617 /** ------------------------------------------------------------------------------------------------------------- *
    618  * @brief requiresCopyBack
    619  ** ------------------------------------------------------------------------------------------------------------- */
    620 bool Kernel::requiresCopyBack(const Binding & binding) const {
     854        return true;
     855    } else if (rate.isRelative()) {
     856        return isCalculable(getStreamBinding(rate.getReference()));
     857    } else {
     858        return false;
     859    }
     860}
     861
     862/** ------------------------------------------------------------------------------------------------------------- *
     863 * @brief requiresOverflow
     864 ** ------------------------------------------------------------------------------------------------------------- */
     865bool Kernel::requiresOverflow(const Binding & binding) const {
    621866    const ProcessingRate & rate = binding.getRate();
    622     if (rate.isFixed() || binding.hasAttribute(Attribute::KindId::BlockSize)) {
     867    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
    623868        return false;
    624869    } else if (rate.isRelative()) {
    625         return requiresCopyBack(getBinding(rate.getReference()));
    626     }
    627     return true;
    628 }
    629 
    630 /** ------------------------------------------------------------------------------------------------------------- *
    631  * @brief requiresLinearAccess
    632  ** ------------------------------------------------------------------------------------------------------------- */
    633 bool Kernel::requiresLinearAccess(const Binding & binding) const {
    634     return binding.hasAttribute(Attribute::KindId::RequiresLinearAccess);
    635 }
    636 
    637 /** ------------------------------------------------------------------------------------------------------------- *
    638  * @brief strideOffsetIsTriviallyCalculable
    639  ** ------------------------------------------------------------------------------------------------------------- */
    640 bool Kernel::strideOffsetIsTriviallyCalculable(const Binding & binding) const {
    641     if (requiresCopyBack(binding)) {
    642         const ProcessingRate & rate = binding.getRate();
    643         return rate.isPopCount() || rate.isNegatedPopCount();
    644     }
    645     return true;
    646 }
    647 
    648 /** ------------------------------------------------------------------------------------------------------------- *
    649  * @brief permitsNonLinearAccess
    650  ** ------------------------------------------------------------------------------------------------------------- */
    651 bool Kernel::permitsNonLinearAccess(const Binding & binding) const {
    652     if (LLVM_UNLIKELY(requiresLinearAccess(binding))) {
     870        return requiresOverflow(getStreamBinding(rate.getReference()));
     871    } else {
     872        return true;
     873    }
     874}
     875
     876/** ------------------------------------------------------------------------------------------------------------- *
     877 * @brief isUnknownRate
     878 ** ------------------------------------------------------------------------------------------------------------- */
     879bool Kernel::isUnknownRate(const Binding & binding) const {
     880    const ProcessingRate & rate = binding.getRate();
     881    if (rate.isUnknown()) {
     882        return true;
     883    } else if (rate.isRelative()) {
     884        return isUnknownRate(getStreamBinding(rate.getReference()));
     885    } else {
    653886        return false;
    654     } else if (LLVM_UNLIKELY(binding.hasAttribute(Attribute::KindId::PermitsNonLinearAccess))) {
    655         return true;
     887    }
     888}
     889
     890/** ------------------------------------------------------------------------------------------------------------- *
     891 * @brief initializeBindings
     892 ** ------------------------------------------------------------------------------------------------------------- */
     893void Kernel::initializeBindings(BaseDriver & driver) {
     894
     895    for (unsigned i = 0; i < mInputScalars.size(); i++) {
     896        Binding & input = mInputScalars[i];
     897        addScalarToMap(input.getName(), ScalarType::Input, i);
     898        if (input.getRelationship() == nullptr) {
     899            input.setRelationship(driver.CreateScalar(input.getType()));
     900        }
     901    }
     902    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
     903        Binding & input = mInputStreamSets[i];
     904        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
     905            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
     906        }
     907        addStreamToMap(input.getName(), Port::Input, i);
     908    }
     909    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
     910        Binding & output = mOutputStreamSets[i];
     911        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
     912            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
     913        }
     914        addStreamToMap(output.getName(), Port::Output, i);
     915    }
     916    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
     917        const Binding & internal = mInternalScalars[i];
     918        addScalarToMap(internal.getName(), ScalarType::Internal, i);
     919    }
     920    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
     921        Binding & output = mOutputScalars[i];
     922        addScalarToMap(output.getName(), ScalarType::Output, i);
     923        if (output.getRelationship() == nullptr) {
     924            output.setRelationship(driver.CreateScalar(output.getType()));
     925        }
     926    }
     927}
     928
     929/** ------------------------------------------------------------------------------------------------------------- *
     930 * @brief setInputStreamSetAt
     931 ** ------------------------------------------------------------------------------------------------------------- */
     932void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
     933    mInputStreamSets[i].setRelationship(value);
     934}
     935
     936/** ------------------------------------------------------------------------------------------------------------- *
     937 * @brief setOutputStreamSetAt
     938 ** ------------------------------------------------------------------------------------------------------------- */
     939void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
     940    mOutputStreamSets[i].setRelationship(value);
     941}
     942
     943/** ------------------------------------------------------------------------------------------------------------- *
     944 * @brief setInputScalarAt
     945 ** ------------------------------------------------------------------------------------------------------------- */
     946void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
     947    mInputScalars[i].setRelationship(value);
     948}
     949
     950/** ------------------------------------------------------------------------------------------------------------- *
     951 * @brief setOutputScalarAt
     952 ** ------------------------------------------------------------------------------------------------------------- */
     953void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
     954    mOutputScalars[i].setRelationship(value);
     955}
     956
     957/** ------------------------------------------------------------------------------------------------------------- *
     958 * @brief getPopCountRateItemCount
     959 ** ------------------------------------------------------------------------------------------------------------- */
     960Value * Kernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, Value * const strideIndex) {
     961    assert (rate.isPopCount() || rate.isNegatedPopCount());
     962    Port refPort;
     963    unsigned refIndex = 0;
     964    std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
     965    assert (refPort == Port::Input);
     966    Value * array = nullptr;
     967    if (rate.isNegatedPopCount()) {
     968        array = mNegatedPopCountRateArray[refIndex];
    656969    } else {
    657         return strideOffsetIsTriviallyCalculable(binding);
    658     }
    659 }
    660 
    661 /** ------------------------------------------------------------------------------------------------------------- *
    662  * @brief mustClearOverflowPriorToCopyback
    663  ** ------------------------------------------------------------------------------------------------------------- */
    664 bool Kernel::mustClearOverflowPriorToCopyback(const Binding & binding) const {
    665     return requiresCopyBack(binding) && permitsNonLinearAccess(binding) && !strideOffsetIsTriviallyCalculable(binding);
    666 }
    667 
    668 /** ------------------------------------------------------------------------------------------------------------- *
    669  * @brief anyBindingRequiresLinearSpace
    670  ** ------------------------------------------------------------------------------------------------------------- */
    671 bool Kernel::anyBindingRequiresLinearSpace() const {
    672     for (const Binding & input : mStreamSetInputs) {
    673         if (requiresLinearAccess(input)) {
    674             return true;
    675         }
    676     }
    677     for (const Binding & output : mStreamSetOutputs) {
    678         if (!permitsNonLinearAccess(output)) {
    679             return true;
    680         }
    681     }
    682     return false;
     970        array = mPopCountRateArray[refIndex];
     971    }
     972    assert (array && "missing pop count array attribute");
     973    return b->CreateLoad(b->CreateGEP(array, strideIndex));
    683974}
    684975
     
    687978 ** ------------------------------------------------------------------------------------------------------------- */
    688979void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
    689     mTreatUnsafeKernelOperationsAsErrors = false;
    690980    generateDoSegmentMethod(b);
    691981}
    692982
    693 static inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
     983/** ------------------------------------------------------------------------------------------------------------- *
     984 * @brief annotateKernelNameWithDebugFlags
     985 ** ------------------------------------------------------------------------------------------------------------- */
     986inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
    694987    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    695988        name += "_EA";
    696989    }
     990    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     991        name += "_MP";
     992    }
    697993    name += "_O" + std::to_string((int)codegen::OptLevel);
    698994    return name;
     995}
     996
     997/** ------------------------------------------------------------------------------------------------------------- *
     998 * @brief getDefaultFamilyName
     999 ** ------------------------------------------------------------------------------------------------------------- */
     1000std::string Kernel::getDefaultFamilyName() const {
     1001    std::string tmp;
     1002    llvm::raw_string_ostream out(tmp);
     1003    out << "F";
     1004    out << getStride();
     1005    AttributeSet::print(out);
     1006    for (const Binding & input : mInputScalars) {
     1007        out << ",IV("; input.print(this, out); out << ')';
     1008    }
     1009    for (const Binding & input : mInputStreamSets) {
     1010        out << ",IS("; input.print(this, out); out << ')';
     1011    }
     1012    for (const Binding & output : mOutputStreamSets) {
     1013        out << ",OS("; output.print(this, out); out << ')';
     1014    }
     1015    for (const Binding & output : mOutputScalars) {
     1016        out << ",OV("; output.print(this, out); out << ')';
     1017    }
     1018    out.flush();
     1019    return tmp;
    6991020}
    7001021
     
    7031024               Bindings && stream_inputs,
    7041025               Bindings && stream_outputs,
    705                Bindings && scalar_parameters,
     1026               Bindings && scalar_inputs,
    7061027               Bindings && scalar_outputs,
    7071028               Bindings && internal_scalars)
    708 : KernelInterface(annotateKernelNameWithDebugFlags(std::move(kernelName))
    709                   , std::move(stream_inputs), std::move(stream_outputs)
    710                   , std::move(scalar_parameters), std::move(scalar_outputs)
    711                   , std::move(internal_scalars))
     1029: mIsGenerated(false)
     1030, mHandle(nullptr)
     1031, mModule(nullptr)
     1032, mKernelStateType(nullptr)
     1033, mInputStreamSets(std::move(stream_inputs))
     1034, mOutputStreamSets(std::move(stream_outputs))
     1035, mInputScalars(std::move(scalar_inputs))
     1036, mOutputScalars(std::move(scalar_outputs))
     1037, mInternalScalars( std::move(internal_scalars))
    7121038, mCurrentMethod(nullptr)
    7131039, mStride(0)
    714 , mTreatUnsafeKernelOperationsAsErrors(false)
    7151040, mIsFinal(nullptr)
    716 , mOutputScalarResult(nullptr)
    717 , mIsGenerated(false) {
    718 
    719 }
    720 
    721 Kernel::~Kernel() {
    722 
    723 }
     1041, mNumOfStrides(nullptr)
     1042, mKernelName(std::move(annotateKernelNameWithDebugFlags(std::move(kernelName)))) {
     1043
     1044}
     1045
     1046Kernel::~Kernel() { }
    7241047
    7251048// CONSTRUCTOR
     
    7301053                                             Bindings && scalar_outputs,
    7311054                                             Bindings && internal_scalars)
    732 : Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
    733 
    734 }
    735 
    736 
    737 }
     1055: Kernel(std::move(kernelName),
     1056         std::move(stream_inputs), std::move(stream_outputs),
     1057         std::move(scalar_parameters), std::move(scalar_outputs),
     1058         std::move(internal_scalars))  {
     1059
     1060}
     1061
     1062
     1063}
Note: See TracChangeset for help on using the changeset viewer.