source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 6187

Last change on this file since 6187 was 6187, checked in by nmedfort, 7 months ago

Potential bug fix for u32u8. CPUDriver only constructs the pass manager if uncached kernels exist.

File size: 47.9 KB
RevLine 
[4924]1/*
[5841]2 *  Copyright (c) 2018 International Characters.
[4924]3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
[5425]7#include <toolchain/toolchain.h>
[6184]8#include <toolchain/driver.h>
9#include <kernels/relationship.h>
[5297]10#include <kernels/streamset.h>
[6184]11#include <kernels/kernel_builder.h>
12#include <llvm/IR/CallingConv.h>
13#include <llvm/IR/DerivedTypes.h>
[5297]14#include <llvm/IR/Constants.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/Instructions.h>
[5350]17#include <llvm/IR/MDBuilder.h>
[5267]18#include <llvm/IR/Module.h>
19#include <llvm/Support/raw_ostream.h>
[5841]20#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
[5392]21#include <llvm/Bitcode/ReaderWriter.h>
[5732]22#else
23#include <llvm/Bitcode/BitcodeWriter.h>
24#endif
[5350]25#include <llvm/Transforms/Utils/Local.h>
[6184]26#include <llvm/Support/Debug.h>
27#include <boost/uuid/sha1.hpp>
28#include <llvm/Support/Format.h>
[5408]29#include <sstream>
[4924]30
[6184]31
[5435]32using namespace llvm;
[6184]33using namespace boost;
34using boost::container::flat_set;
[5287]35
[5435]36namespace kernel {
[5287]37
[6184]38using AttrId = Attribute::KindId;
39using RateValue = ProcessingRate::RateValue;
40using RateId = ProcessingRate::KindId;
41using StreamPort = Kernel::StreamSetPort;
42using Port = Kernel::Port;
43
44// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
45// within the kernel state to hide the complexity from the user?
46
47const static auto INIT_SUFFIX = "_Init";
48const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
49const static auto TERMINATE_SUFFIX = "_Terminate";
50
[5706]51/** ------------------------------------------------------------------------------------------------------------- *
[6184]52 * @brief setInstance
[5706]53 ** ------------------------------------------------------------------------------------------------------------- */
[6184]54void  Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
55    assert ("handle cannot be null!" && handle);
56    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
57    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
58    #ifndef NDEBUG
59    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
60    const Function * const builderFunction = b->GetInsertBlock()->getParent();
61    assert ("handle is not from the current function." && (handleFunction == builderFunction));
62    #endif
63    mHandle = handle;
64}
65
66/** ------------------------------------------------------------------------------------------------------------- *
67 * @brief isLocalBuffer
68 ** ------------------------------------------------------------------------------------------------------------- */
69inline bool isLocalBuffer(const Binding & output) {
70    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
71}
72
73/** ------------------------------------------------------------------------------------------------------------- *
74 * @brief addBaseKernelProperties
75 *
76 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
77 ** ------------------------------------------------------------------------------------------------------------- */
78void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
79
80    // Set the default kernel stride.
81    if (mStride == 0) {
82        mStride = b->getBitBlockWidth();
[4924]83    }
[6184]84
85    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
86    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
87    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
88    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
89
90    mStreamSetInputBuffers.clear();
91    const auto numOfInputStreams = mInputStreamSets.size();
92    mStreamSetInputBuffers.reserve(numOfInputStreams);
93    for (unsigned i = 0; i < numOfInputStreams; ++i) {
94        const Binding & input = mInputStreamSets[i];
95        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
[5283]96    }
[6184]97
98    mStreamSetOutputBuffers.clear();
99    const auto numOfOutputStreams = mOutputStreamSets.size();
100    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
101    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
102        const Binding & output = mOutputStreamSets[i];
103        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
104    }
105
106    IntegerType * const sizeTy = b->getSizeTy();
107    PointerType * const sizePtrPtrTy = sizeTy->getPointerTo()->getPointerTo();
108
109    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
110    addInternalScalar(sizeTy, TERMINATION_SIGNAL);
111
112    // TODO: if we had a way of easily calculating the number of processed/produced items of the
113    // final stride of a non-deferred fixed rate stream, we could avoid storing the item counts.
114    for (unsigned i = 0; i < numOfInputStreams; ++i) {
115        const Binding & input = mInputStreamSets[i];
116        addInternalScalar(sizeTy, input.getName() + PROCESSED_ITEM_COUNT_SUFFIX);
117        if (LLVM_UNLIKELY(input.isDeferred())) {
118            addInternalScalar(sizeTy, input.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
119        }
120    }
121
122    // If an output is a managed buffer, we need to store both the buffer and a set of consumers.
123    Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy})->getPointerTo();
124    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
125        const Binding & output = mOutputStreamSets[i];
126        addInternalScalar(sizeTy, output.getName() + PRODUCED_ITEM_COUNT_SUFFIX);
127        if (LLVM_UNLIKELY(output.isDeferred())) {
128            addInternalScalar(sizeTy, output.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
129        }
130        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
131            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
132            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
133            addInternalScalar(consumerSetTy, output.getName() + CONSUMER_SUFFIX);
134            addInternalScalar(sizeTy, output.getName() + CONSUMED_ITEM_COUNT_SUFFIX);
135        }
136    }
137
138    // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
139    // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
140    // will be able to add instrumentation to cached modules without recompilation.
141    addInternalScalar(b->getInt64Ty(), CYCLECOUNT_SCALAR);
142
[4924]143}
[4968]144
[5706]145/** ------------------------------------------------------------------------------------------------------------- *
[6184]146 * @brief addScalarToMap
[5706]147 ** ------------------------------------------------------------------------------------------------------------- */
[6187]148void Kernel::addScalarToMap(const llvm::StringRef name, const ScalarType scalarType, const unsigned index) {
149    const auto r = mScalarMap.insert(std::make_pair(name, ScalarField{scalarType, index}));
[6184]150    if (LLVM_UNLIKELY(!r.second)) {
151        const ScalarField & sf = r.first->second;
152        if (LLVM_UNLIKELY(sf.type != scalarType || sf.index != index)) {
153            report_fatal_error(getName() + " already contains scalar " + name);
154        }
[5283]155    }
156}
157
[5706]158/** ------------------------------------------------------------------------------------------------------------- *
[6184]159 * @brief addScalarToMap
[5706]160 ** ------------------------------------------------------------------------------------------------------------- */
[6187]161void Kernel::addStreamToMap(const llvm::StringRef name, const Port port, const unsigned index) {
162    const auto r = mStreamSetMap.insert(std::make_pair(name, std::make_pair(port, index)));
[6184]163    if (LLVM_UNLIKELY(!r.second)) {
164        const StreamPort & sf = r.first->second;
165        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
166            report_fatal_error(getName() + " already contains stream " + name);
167        }
168    }
169}
[5440]170
[6184]171/** ------------------------------------------------------------------------------------------------------------- *
172 * @brief addKernelDeclarations
173 ** ------------------------------------------------------------------------------------------------------------- */
174void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
175    if (mKernelStateType == nullptr) {
176        throw std::runtime_error("Kernel state definition " + getName() + " has not been finalized.");
[5440]177    }
[6184]178    addInitializeDeclaration(b);
179    addDoSegmentDeclaration(b);
180    addFinalizeDeclaration(b);
181    linkExternalMethods(b);
182}
[5440]183
[6184]184/** ------------------------------------------------------------------------------------------------------------- *
185 * @brief addInitializeDeclaration
186 ** ------------------------------------------------------------------------------------------------------------- */
187void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
188
189    std::vector<Type *> params;
190    params.push_back(mKernelStateType->getPointerTo());
191    for (const Binding & binding : mInputScalars) {
192        params.push_back(binding.getType());
[5985]193    }
194
[6184]195    FunctionType * const initType = FunctionType::get(b->getVoidTy(), params, false);
196    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
197    initFunc->setCallingConv(CallingConv::C);
198    initFunc->setDoesNotThrow();
199    auto args = initFunc->arg_begin();
200    args->setName("self");
201    for (const Binding & binding : mInputScalars) {
202        (++args)->setName(binding.getName());
203    }
204
205    assert (std::next(args) == initFunc->arg_end());
206}
207
208/** ------------------------------------------------------------------------------------------------------------- *
209 * @brief callGenerateInitializeMethod
210 ** ------------------------------------------------------------------------------------------------------------- */
211void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
212    const Kernel * const storedKernel = b->getKernel();
213    b->setKernel(this);
214    Value * const storedHandle = getHandle();
215    mCurrentMethod = getInitFunction(b->getModule());
216    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
217    auto args = mCurrentMethod->arg_begin();
218    setHandle(b, &*args);
219    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
220        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
221    }
222    b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getHandle());
223    for (const auto & binding : mInputScalars) {
224        b->setScalarField(binding.getName(), &*(++args));
225    }
226
227    const auto numOfOutputs = mOutputStreamSets.size();
228    for (unsigned i = 0; i < numOfOutputs; i++) {
229        const Binding & output = mOutputStreamSets[i];
230        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
231            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
232            mStreamSetOutputBuffers[i]->setHandle(b, handle);
[5440]233        }
234    }
[6184]235    generateInitializeMethod(b);
236    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
237        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
238    }
239    b->CreateRetVoid();
240    b->setKernel(storedKernel);
241    mHandle = storedHandle;
242    mCurrentMethod = nullptr;
243}
[5440]244
[6184]245/** ------------------------------------------------------------------------------------------------------------- *
246 * @brief addDoSegmentDeclaration
247 ** ------------------------------------------------------------------------------------------------------------- */
248void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
249
250    IntegerType * const sizeTy = b->getSizeTy();
251    PointerType * const sizePtrTy = sizeTy->getPointerTo();
252    Type * const voidTy = b->getVoidTy();
253
254    std::vector<Type *> params;
255    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
256    params.push_back(mKernelStateType->getPointerTo());  // self
257    params.push_back(sizeTy); // numOfStrides
258    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
259        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
260        params.push_back(bufferType->getPointerTo()); // logical "base" input address
261        params.push_back(sizeTy);  // accessible input items (after non-deferred processed item count)
262        const Binding & input = mInputStreamSets[i];
263        unsigned numOfPopCountArrays = 0;
264        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
265            ++numOfPopCountArrays;
266        }
267        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
268            ++numOfPopCountArrays;
269        }
270        if (numOfPopCountArrays) {
271            params.insert(params.end(), numOfPopCountArrays, sizePtrTy); // popCountRef array (length is numOfStrides)
272        }
[5440]273    }
[6184]274    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
275        const Binding & output = mOutputStreamSets[i];
276        if (LLVM_LIKELY(!isLocalBuffer(output))) {
277            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
278            params.push_back(bufferType->getPointerTo()); // logical "base" output address
279            params.push_back(sizeTy); // writable output items (after non-deferred produced item count)
280        }
281    }
[5440]282
[6184]283    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
284    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
285    doSegment->setCallingConv(CallingConv::C);
286    doSegment->setDoesNotThrow();
287    auto args = doSegment->arg_begin();
288    args->setName("self");
289    (++args)->setName("numOfStrides");
290    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
291        const Binding & input = mInputStreamSets[i];
292        (++args)->setName(input.getName());
293        (++args)->setName(input.getName() + "_accessible");
294        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
295            (++args)->setName(input.getName() + "_popCountArray");
296        }
297        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
298            (++args)->setName(input.getName() + "_negatedPopCountArray");
299        }
[5985]300    }
[6184]301    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
302        const Binding & output = mOutputStreamSets[i];
303        if (LLVM_LIKELY(!isLocalBuffer(output))) {
304            (++args)->setName(output.getName());
305            (++args)->setName(output.getName() + "_writable");
306        }
307    }
308    assert (std::next(args) == doSegment->arg_end());
309}
[5985]310
[6184]311/** ------------------------------------------------------------------------------------------------------------- *
312 * @brief callGenerateKernelMethod
313 ** ------------------------------------------------------------------------------------------------------------- */
314void Kernel::callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
315
316    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
317    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
318
319    const Kernel * const storedKernel = b->getKernel();
320    b->setKernel(this);
321    Value * const storedHandle = getHandle();
322    mCurrentMethod = getDoSegmentFunction(b->getModule());
323    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
324    auto args = mCurrentMethod->arg_begin();
325    setHandle(b, &*(args++));
326    mNumOfStrides = &*(args++);
327    mIsFinal = b->CreateIsNull(mNumOfStrides);
328    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
329        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
330    }
331    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
332    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
333    // and call system here but that is not an ideal way of handling this.
334
335    // TODO: use a graph to depict relations between binding? It would be better to first move to a model
336    // where inputs and outputs are contained in a single parameter vector.
337
338    const auto numOfInputs = getNumOfStreamInputs();
339    mAccessibleInputItems.resize(numOfInputs, nullptr);
340    mAvailableInputItems.resize(numOfInputs, nullptr);
341    mPopCountRateArray.resize(numOfInputs, nullptr);
342    mNegatedPopCountRateArray.resize(numOfInputs, nullptr);
343    for (unsigned i = 0; i < numOfInputs; i++) {
344        const Binding & input = mInputStreamSets[i];
345        assert (args != mCurrentMethod->arg_end());
346        Value * const addr = &*(args++);
347        auto & buffer = mStreamSetInputBuffers[i];
348        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
349        buffer->setHandle(b, localHandle);
350        buffer->setBaseAddress(b.get(), addr);
351        assert (args != mCurrentMethod->arg_end());
352        Value * const accessible = &*(args++);
353        mAccessibleInputItems[i] = accessible;
354        Value * const processed = b->getNonDeferredProcessedItemCount(input);
355        Value * capacity = b->CreateAdd(processed, accessible);
356        mAvailableInputItems[i] = capacity;
357        if (input.hasLookahead()) {
358            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
[5440]359        }
[6184]360        buffer->setCapacity(b.get(), capacity);
361        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
362            assert (args != mCurrentMethod->arg_end());
363            mPopCountRateArray[i] = &*(args++);
364        }
365        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
366            assert (args != mCurrentMethod->arg_end());
367            mNegatedPopCountRateArray[i] = &*(args++);
368        }
369    }
370
371    // set all of the output buffers
372    const auto numOfOutputs = getNumOfStreamOutputs();
373    mWritableOutputItems.resize(numOfOutputs, nullptr);
374    for (unsigned i = 0; i < numOfOutputs; i++) {
375        // If an output is a managed buffer, the address is stored within the state instead
376        // of being passed in through the function call.
377        auto & buffer = mStreamSetOutputBuffers[i];
378        const Binding & output = mOutputStreamSets[i];
379        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
380            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
381            buffer->setHandle(b, handle);
[5440]382        } else {
[6184]383            assert (args != mCurrentMethod->arg_end());
384            Value * const logicalBaseAddress = &*(args++);
385            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
386            buffer->setHandle(b, localHandle);
387            buffer->setBaseAddress(b.get(), logicalBaseAddress);
388            assert (args != mCurrentMethod->arg_end());
389            Value * const writable = &*(args++);
390            mWritableOutputItems[i] = writable;
391            Value * const produced = b->getNonDeferredProducedItemCount(output);
392            Value * const capacity = b->CreateAdd(produced, writable);
393            buffer->setCapacity(b.get(), capacity);
[5440]394        }
395    }
[6184]396    assert (args == mCurrentMethod->arg_end());
[5440]397
[6184]398    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
399        Value * const terminated = b->getTerminationSignal();
400        b->CreateAssert(b->CreateNot(terminated), getName() + " was called after termination");
401    }
402
403    // Calculate and/or load the accessible and writable item counts. If they are unneeded,
404    // LLVM ought to recognize them as dead code and remove them.
405    generateKernelMethod(b); // must be overridden by the Kernel subtype   
406    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
407        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
408    }
409    b->CreateRetVoid();
410
411    // Clean up all of the constructed buffers.
412    b->setKernel(storedKernel);
413    mHandle = storedHandle;
414    mCurrentMethod = nullptr;
415    mIsFinal = nullptr;
416    mNumOfStrides = nullptr;
417    mAccessibleInputItems.clear();
418    mPopCountRateArray.clear();
419    mNegatedPopCountRateArray.clear();
[5446]420}
421
[5706]422/** ------------------------------------------------------------------------------------------------------------- *
[6184]423 * @brief addFinalizeDeclaration
424 ** ------------------------------------------------------------------------------------------------------------- */
425void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
426    Type * resultType = nullptr;
427    if (mOutputScalars.empty()) {
428        resultType = b->getVoidTy();
429    } else {
430        const auto n = mOutputScalars.size();
431        Type * outputType[n];
432        for (unsigned i = 0; i < n; ++i) {
433            outputType[i] = mOutputScalars[i].getType();
434        }
435        if (n == 1) {
436            resultType = outputType[0];
437        } else {
438            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
439        }
440    }
441    PointerType * const selfType = mKernelStateType->getPointerTo();
442    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
443    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
444    terminateFunc->setCallingConv(CallingConv::C);
445    terminateFunc->setDoesNotThrow();
446    auto args = terminateFunc->arg_begin();
447    args->setName("self");
448    assert (std::next(args) == terminateFunc->arg_end());
449}
450
451/** ------------------------------------------------------------------------------------------------------------- *
452 * @brief callGenerateFinalizeMethod
453 ** ------------------------------------------------------------------------------------------------------------- */
454void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
455
456    const Kernel * const storedKernel = b->getKernel();
457    b->setKernel(this);
458    mCurrentMethod = getTerminateFunction(b->getModule());
459    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
460    auto args = mCurrentMethod->arg_begin();
461    setHandle(b, &*(args++));
462    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
463        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
464    }
465    const auto numOfOutputs = mOutputStreamSets.size();
466    for (unsigned i = 0; i < numOfOutputs; i++) {
467        const Binding & output = mOutputStreamSets[i];
468        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
469            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
470            mStreamSetOutputBuffers[i]->setHandle(b, handle);
471        }
472    }
473
474    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
475    const auto outputs = getFinalOutputScalars(b);
476    b->CreateFree(mHandle);
477    mHandle = nullptr;
478
479    if (outputs.empty()) {
480        b->CreateRetVoid();
481    } else {
482        const auto n = outputs.size();
483        if (n == 1) {
484            b->CreateRet(outputs[0]);
485        } else {
486            b->CreateAggregateRet(outputs.data(), n);
487        }
488    }
489
490    b->setKernel(storedKernel);
491    mCurrentMethod = nullptr;
492}
493
494/** ------------------------------------------------------------------------------------------------------------- *
495 * @brief callGenerateFinalizeMethod
496 ** ------------------------------------------------------------------------------------------------------------- */
497std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
498    const auto n = mOutputScalars.size();
499    std::vector<Value *> outputs(n);
500    for (unsigned i = 0; i < n; ++i) {
501        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
502    }
503    return outputs;
504}
505
506/** ------------------------------------------------------------------------------------------------------------- *
[5706]507 * @brief getCacheName
508 ** ------------------------------------------------------------------------------------------------------------- */
[5985]509std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & b) const {
[5630]510    std::stringstream cacheName;
[5985]511    cacheName << getName() << '_' << b->getBuilderUniqueName();
[5630]512    return cacheName.str();
[5440]513}
514
[5706]515/** ------------------------------------------------------------------------------------------------------------- *
516 * @brief setModule
517 ** ------------------------------------------------------------------------------------------------------------- */
[5630]518Module * Kernel::setModule(Module * const module) {
519    assert (mModule == nullptr || mModule == module);
520    assert (module != nullptr);
[5446]521    mModule = module;
522    return mModule;
523}
524
[5706]525/** ------------------------------------------------------------------------------------------------------------- *
526 * @brief makeModule
527 ** ------------------------------------------------------------------------------------------------------------- */
[6184]528Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
529    Module * m = new Module(getCacheName(b), b->getContext());
530    m->setTargetTriple(b->getModule()->getTargetTriple());
531    m->setDataLayout(b->getModule()->getDataLayout());
[5743]532    return setModule(m);
[5630]533}
534
[5706]535
536/** ------------------------------------------------------------------------------------------------------------- *
[6184]537 * @brief getInitFunction
538 ** ------------------------------------------------------------------------------------------------------------- */
539Function * Kernel::getInitFunction(Module * const module) const {
540    const auto name = getName() + INIT_SUFFIX;
541    Function * f = module->getFunction(name);
542    if (LLVM_UNLIKELY(f == nullptr)) {
543        report_fatal_error("Cannot find " + name);
544    }
545    return f;
546}
547
548/** ------------------------------------------------------------------------------------------------------------- *
549 * @brief getDoSegmentFunction
550 ** ------------------------------------------------------------------------------------------------------------- */
551Function * Kernel::getDoSegmentFunction(Module * const module) const {
552    const auto name = getName() + DO_SEGMENT_SUFFIX;
553    Function * f = module->getFunction(name);
554    if (LLVM_UNLIKELY(f == nullptr)) {
555        report_fatal_error("Cannot find " + name);
556    }
557    return f;
558}
559
560/** ------------------------------------------------------------------------------------------------------------- *
561 * @brief getTerminateFunction
562 ** ------------------------------------------------------------------------------------------------------------- */
563Function * Kernel::getTerminateFunction(Module * const module) const {
564    const auto name = getName() + TERMINATE_SUFFIX;
565    Function * f = module->getFunction(name);
566    if (LLVM_UNLIKELY(f == nullptr)) {
567        report_fatal_error("Cannot find " + name);
568    }
569    return f;
570}
571
572/** ------------------------------------------------------------------------------------------------------------- *
[5706]573 * @brief prepareKernel
574 ** ------------------------------------------------------------------------------------------------------------- */
[5985]575void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
[5246]576    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
[5646]577        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
[5246]578    }
[5985]579    addBaseKernelProperties(b);
580    addInternalKernelProperties(b);
[5620]581    // NOTE: StructType::create always creates a new type even if an identical one exists.
[5630]582    if (LLVM_UNLIKELY(mModule == nullptr)) {
[5985]583        makeModule(b);
[5630]584    }
585    mKernelStateType = mModule->getTypeByName(getName());
[6184]586
587
[5620]588    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
[6184]589        std::vector<llvm::Type *> fields;
590        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
591        for (const Binding & scalar : mInputScalars) {
592            assert (scalar.getType());
593            fields.push_back(scalar.getType());
594        }
595        for (const Binding & scalar : mOutputScalars) {
596            assert (scalar.getType());
597            fields.push_back(scalar.getType());
598        }
599        for (const Binding & scalar : mInternalScalars) {
600            assert (scalar.getType());
601            fields.push_back(scalar.getType());
602        }
603        mKernelStateType = StructType::create(b->getContext(), fields, getName());       
[5755]604    }
[6184]605
606
607
608
609    assert (isa<StructType>(mKernelStateType));
[4970]610}
[5630]611
[5706]612/** ------------------------------------------------------------------------------------------------------------- *
[6184]613 * @brief addInternalScalar
[5706]614 ** ------------------------------------------------------------------------------------------------------------- */
[6187]615void Kernel::addInternalScalar(llvm::Type * type, const llvm::StringRef name) {
[6184]616    const auto index = mInternalScalars.size();
617    mInternalScalars.emplace_back(type, name);
618    addScalarToMap(name, ScalarType::Internal, index);
[5706]619}
620
621/** ------------------------------------------------------------------------------------------------------------- *
[6184]622 * @brief getScalarIndex
[5985]623 ** ------------------------------------------------------------------------------------------------------------- */
[6187]624unsigned Kernel::getScalarIndex(const llvm::StringRef fieldName) const {
[6184]625    const auto & field = getScalarField(fieldName);
626    assert (mKernelStateType);
627    unsigned index = field.index;
628    switch (field.type) {
629        case ScalarType::Internal:
630            index += mOutputScalars.size();
631        case ScalarType::Output:
632            index += mInputScalars.size();
633        case ScalarType::Input:
634            break;
[5985]635    }
[6184]636    assert (index < mKernelStateType->getStructNumElements());
637    return index;
[5985]638}
639
640/** ------------------------------------------------------------------------------------------------------------- *
[6184]641 * @brief prepareCachedKernel
[5706]642 ** ------------------------------------------------------------------------------------------------------------- */
[6184]643void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
644    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
645        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
646    } 
647    addBaseKernelProperties(b);
648    mKernelStateType = getModule()->getTypeByName(getName());
649    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
650        report_fatal_error("Kernel definition for " + getName() + " was not found in the cache!");
[5630]651    }
[6184]652    assert (isa<StructType>(mKernelStateType));
[5630]653}
[4970]654
[5706]655/** ------------------------------------------------------------------------------------------------------------- *
656 * @brief makeSignature
657 *
658 * Default kernel signature: generate the IR and emit as byte code.
659 ** ------------------------------------------------------------------------------------------------------------- */
[6184]660std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
[5464]661    if (LLVM_UNLIKELY(hasSignature())) {
[6184]662        generateKernel(b);
[5865]663        std::string tmp;
664        raw_string_ostream signature(tmp);
665        WriteBitcodeToFile(getModule(), signature);
666        return signature.str();
[5464]667    } else {
668        return getModule()->getModuleIdentifier();
[5401]669    }
[5392]670}
671
[5706]672/** ------------------------------------------------------------------------------------------------------------- *
[6184]673 * @brief getStringHash
674 *
675 * Create a fixed length string hash of the given str
[5706]676 ** ------------------------------------------------------------------------------------------------------------- */
[6187]677std::string Kernel::getStringHash(const llvm::StringRef str) {
[6184]678
679    uint32_t digest[5]; // 160 bits in total
680    boost::uuids::detail::sha1 sha1;
[6187]681    sha1.process_bytes(str.data(), str.size());
[6184]682    sha1.get_digest(digest);
683
684    std::string buffer;
685    buffer.reserve((5 * 8) + 1);
686    raw_string_ostream out(buffer);
687    for (unsigned i = 0; i < 5; ++i) {
688        out << format_hex_no_prefix(digest[i], 8);
689    }
690    out.flush();
691
692    return buffer;
[5250]693}
[5246]694
[5706]695/** ------------------------------------------------------------------------------------------------------------- *
[6184]696 * @brief createInstance
[5706]697 ** ------------------------------------------------------------------------------------------------------------- */
[6184]698Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
699    assert (mKernelStateType && "cannot create instance before calling prepareKernel() or prepareCachedKernel()");
700    Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
701    Value * handle = nullptr;
702    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
703        handle = b->CreateAlignedMalloc(size, b->getPageSize());
704        b->CreateMProtect(handle, size, CBuilder::Protect::READ);
705    } else {
706        handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
[5051]707    }
[6184]708//    mHandle = b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
709//    return mHandle;
710    return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
[5051]711}
712
[5706]713/** ------------------------------------------------------------------------------------------------------------- *
[6184]714 * @brief initializeInstance
[5706]715 ** ------------------------------------------------------------------------------------------------------------- */
[6184]716void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
717    assert (args.size() == getNumOfScalarInputs() + 1);
718    assert (args[0] && "cannot initialize before creation");
719    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
720    b->setKernel(this);
721    Function * const init = getInitFunction(b->getModule());
722    b->CreateCall(init, args);
[5411]723}
724
[6184]725/** ------------------------------------------------------------------------------------------------------------- *
726 * @brief generateKernel
727 ** ------------------------------------------------------------------------------------------------------------- */
728void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
729    if (LLVM_UNLIKELY(mIsGenerated)) return;
730    b->setKernel(this);
731    b->setModule(mModule);
732    addKernelDeclarations(b);
733    callGenerateInitializeMethod(b);
734    callGenerateKernelMethod(b);
735    callGenerateFinalizeMethod(b);
736    addAdditionalFunctions(b);
737    mIsGenerated = true;
738}
[5706]739
740/** ------------------------------------------------------------------------------------------------------------- *
[6184]741 * @brief finalizeInstance
[5706]742 ** ------------------------------------------------------------------------------------------------------------- */
[6184]743Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
744    assert (mHandle && "was not set");
745    Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
746    mHandle = nullptr;
747    if (mOutputScalars.empty()) {
748        assert (!result || result->getType()->isVoidTy());
749        result = nullptr;
[5418]750    }
[6184]751    return result;
752
[5418]753}
754
[5706]755/** ------------------------------------------------------------------------------------------------------------- *
[6184]756 * @brief getScalarField
[5706]757 ** ------------------------------------------------------------------------------------------------------------- */
[6187]758const Kernel::ScalarField & Kernel::getScalarField(const llvm::StringRef name) const {
[6184]759    assert (!mScalarMap.empty());
760    const auto f = mScalarMap.find(name);
761    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
[5320]762        report_fatal_error(getName() + " does not contain scalar: " + name);
[5000]763    }
[5435]764    return f->second;
[4959]765}
[4924]766
[5706]767/** ------------------------------------------------------------------------------------------------------------- *
[6184]768 * @brief getInputScalarBinding
[5706]769 ** ------------------------------------------------------------------------------------------------------------- */
[6187]770Binding & Kernel::getInputScalarBinding(const llvm::StringRef name) {
[6184]771    const ScalarField & field = getScalarField(name);
772    if (LLVM_UNLIKELY(field.type != ScalarType::Input)) {
773        report_fatal_error(getName() + "." + name + "is not an input scalar");
[5246]774    }
[6184]775    return mInputScalars[field.index];
[5408]776}
[5320]777
[5706]778/** ------------------------------------------------------------------------------------------------------------- *
[6184]779 * @brief getOutputScalarBinding
[5706]780 ** ------------------------------------------------------------------------------------------------------------- */
[6187]781Binding & Kernel::getOutputScalarBinding(const llvm::StringRef name) {
[6184]782    const ScalarField & field = getScalarField(name);
783    if (LLVM_UNLIKELY(field.type != ScalarType::Output)) {
784        report_fatal_error(getName() + "." + name + "is not an output scalar");
[5408]785    }
[6184]786    return mOutputScalars[field.index];
[5133]787}
[5104]788
[5706]789/** ------------------------------------------------------------------------------------------------------------- *
790 * @brief getStreamPort
791 ** ------------------------------------------------------------------------------------------------------------- */
[6187]792Kernel::StreamSetPort Kernel::getStreamPort(const llvm::StringRef name) const {
[6184]793    const auto f = mStreamSetMap.find(name);
794    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
795        assert (!mStreamSetMap.empty());
[5706]796        report_fatal_error(getName() + " does not contain stream set " + name);
797    }
798    return f->second;
799}
800
801/** ------------------------------------------------------------------------------------------------------------- *
[6184]802 * @brief getBinding
[5755]803 ** ------------------------------------------------------------------------------------------------------------- */
[6187]804const Binding & Kernel::getStreamBinding(const llvm::StringRef name) const {
[5755]805    Port port; unsigned index;
806    std::tie(port, index) = getStreamPort(name);
[6184]807    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
[5755]808}
809
810/** ------------------------------------------------------------------------------------------------------------- *
[5757]811 * @brief getLowerBound
812 ** ------------------------------------------------------------------------------------------------------------- */
[6184]813RateValue Kernel::getLowerBound(const Binding & binding) const {
814    const ProcessingRate & rate = binding.getRate();
815    if (rate.hasReference()) {
816        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
817    } else {
[5757]818        return rate.getLowerBound();
819    }
820}
821
822/** ------------------------------------------------------------------------------------------------------------- *
823 * @brief getUpperBound
824 ** ------------------------------------------------------------------------------------------------------------- */
[6184]825RateValue Kernel::getUpperBound(const Binding & binding) const {
826    const ProcessingRate & rate = binding.getRate();
827    if (rate.hasReference()) {
828        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
829    } else {
[5757]830        return rate.getUpperBound();
831    }
832}
833
834/** ------------------------------------------------------------------------------------------------------------- *
[6184]835 * @brief isCountable
[5755]836 ** ------------------------------------------------------------------------------------------------------------- */
[6184]837bool Kernel::isCountable(const Binding & binding) const {
838    const ProcessingRate & rate = binding.getRate();
839    if (rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount()) {
840        return true;
841//    } else if (rate.isRelative()) {
842//        return isCountable(getStreamBinding(rate.getReference()));
843    } else {
844        return false;
[5831]845    }
[6184]846}
[5706]847
[6184]848/** ------------------------------------------------------------------------------------------------------------- *
849 * @brief isCalculable
850 ** ------------------------------------------------------------------------------------------------------------- */
851bool Kernel::isCalculable(const Binding & binding) const {
852    const ProcessingRate & rate = binding.getRate();
853    if (rate.isFixed() || rate.isBounded()) {
854        return true;
855    } else if (rate.isRelative()) {
856        return isCalculable(getStreamBinding(rate.getReference()));
857    } else {
858        return false;
[5706]859    }
[5985]860}
[5706]861
[5985]862/** ------------------------------------------------------------------------------------------------------------- *
[6184]863 * @brief requiresOverflow
[5755]864 ** ------------------------------------------------------------------------------------------------------------- */
[6184]865bool Kernel::requiresOverflow(const Binding & binding) const {
[5985]866    const ProcessingRate & rate = binding.getRate();
[6184]867    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
[5985]868        return false;
[5755]869    } else if (rate.isRelative()) {
[6184]870        return requiresOverflow(getStreamBinding(rate.getReference()));
871    } else {
872        return true;
[5755]873    }
874}
[5706]875
[5755]876/** ------------------------------------------------------------------------------------------------------------- *
[6184]877 * @brief isUnknownRate
[5755]878 ** ------------------------------------------------------------------------------------------------------------- */
[6184]879bool Kernel::isUnknownRate(const Binding & binding) const {
880    const ProcessingRate & rate = binding.getRate();
881    if (rate.isUnknown()) {
882        return true;
883    } else if (rate.isRelative()) {
884        return isUnknownRate(getStreamBinding(rate.getReference()));
885    } else {
886        return false;
887    }
[5755]888}
[5706]889
[5755]890/** ------------------------------------------------------------------------------------------------------------- *
[6184]891 * @brief initializeBindings
[5755]892 ** ------------------------------------------------------------------------------------------------------------- */
[6184]893void Kernel::initializeBindings(BaseDriver & driver) {
894
895    for (unsigned i = 0; i < mInputScalars.size(); i++) {
896        Binding & input = mInputScalars[i];
897        addScalarToMap(input.getName(), ScalarType::Input, i);
898        if (input.getRelationship() == nullptr) {
899            input.setRelationship(driver.CreateScalar(input.getType()));
900        }
[5755]901    }
[6184]902    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
903        Binding & input = mInputStreamSets[i];
904        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
905            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
906        }
907        addStreamToMap(input.getName(), Port::Input, i);
908    }
909    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
910        Binding & output = mOutputStreamSets[i];
911        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
912            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
913        }
914        addStreamToMap(output.getName(), Port::Output, i);
915    }
916    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
917        const Binding & internal = mInternalScalars[i];
918        addScalarToMap(internal.getName(), ScalarType::Internal, i);
919    }
920    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
921        Binding & output = mOutputScalars[i];
922        addScalarToMap(output.getName(), ScalarType::Output, i);
923        if (output.getRelationship() == nullptr) {
924            output.setRelationship(driver.CreateScalar(output.getType()));
925        }
926    }
[5706]927}
928
[5755]929/** ------------------------------------------------------------------------------------------------------------- *
[6184]930 * @brief setInputStreamSetAt
[5755]931 ** ------------------------------------------------------------------------------------------------------------- */
[6184]932void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
933    mInputStreamSets[i].setRelationship(value);
[5285]934}
935
[5755]936/** ------------------------------------------------------------------------------------------------------------- *
[6184]937 * @brief setOutputStreamSetAt
[5755]938 ** ------------------------------------------------------------------------------------------------------------- */
[6184]939void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
940    mOutputStreamSets[i].setRelationship(value);
[5755]941}
[5292]942
[5755]943/** ------------------------------------------------------------------------------------------------------------- *
[6184]944 * @brief setInputScalarAt
[5755]945 ** ------------------------------------------------------------------------------------------------------------- */
[6184]946void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
947    mInputScalars[i].setRelationship(value);
948}
949
950/** ------------------------------------------------------------------------------------------------------------- *
951 * @brief setOutputScalarAt
952 ** ------------------------------------------------------------------------------------------------------------- */
953void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
954    mOutputScalars[i].setRelationship(value);
955}
956
957/** ------------------------------------------------------------------------------------------------------------- *
958 * @brief getPopCountRateItemCount
959 ** ------------------------------------------------------------------------------------------------------------- */
960Value * Kernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, Value * const strideIndex) {
961    assert (rate.isPopCount() || rate.isNegatedPopCount());
962    Port refPort;
963    unsigned refIndex = 0;
964    std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
965    assert (refPort == Port::Input);
966    Value * array = nullptr;
967    if (rate.isNegatedPopCount()) {
968        array = mNegatedPopCountRateArray[refIndex];
969    } else {
970        array = mPopCountRateArray[refIndex];
[5347]971    }
[6184]972    assert (array && "missing pop count array attribute");
973    return b->CreateLoad(b->CreateGEP(array, strideIndex));
[5285]974}
975
[5755]976/** ------------------------------------------------------------------------------------------------------------- *
[5985]977 * @brief generateKernelMethod
[5755]978 ** ------------------------------------------------------------------------------------------------------------- */
[5985]979void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
980    generateDoSegmentMethod(b);
[5292]981}
982
[6184]983/** ------------------------------------------------------------------------------------------------------------- *
984 * @brief annotateKernelNameWithDebugFlags
985 ** ------------------------------------------------------------------------------------------------------------- */
986inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
[5755]987    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
[5454]988        name += "_EA";
989    }
[6184]990    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
991        name += "_MP";
992    }
[5620]993    name += "_O" + std::to_string((int)codegen::OptLevel);
[5454]994    return name;
995}
996
[6184]997/** ------------------------------------------------------------------------------------------------------------- *
998 * @brief getDefaultFamilyName
999 ** ------------------------------------------------------------------------------------------------------------- */
1000std::string Kernel::getDefaultFamilyName() const {
1001    std::string tmp;
1002    llvm::raw_string_ostream out(tmp);
1003    out << "F";
1004    out << getStride();
1005    AttributeSet::print(out);
1006    for (const Binding & input : mInputScalars) {
1007        out << ",IV("; input.print(this, out); out << ')';
1008    }
1009    for (const Binding & input : mInputStreamSets) {
1010        out << ",IS("; input.print(this, out); out << ')';
1011    }
1012    for (const Binding & output : mOutputStreamSets) {
1013        out << ",OS("; output.print(this, out); out << ')';
1014    }
1015    for (const Binding & output : mOutputScalars) {
1016        out << ",OV("; output.print(this, out); out << ')';
1017    }
1018    out.flush();
1019    return tmp;
1020}
1021
[5285]1022// CONSTRUCTOR
[5435]1023Kernel::Kernel(std::string && kernelName,
[5755]1024               Bindings && stream_inputs,
1025               Bindings && stream_outputs,
[6184]1026               Bindings && scalar_inputs,
[5755]1027               Bindings && scalar_outputs,
1028               Bindings && internal_scalars)
[6184]1029: mIsGenerated(false)
1030, mHandle(nullptr)
1031, mModule(nullptr)
1032, mKernelStateType(nullptr)
1033, mInputStreamSets(std::move(stream_inputs))
1034, mOutputStreamSets(std::move(stream_outputs))
1035, mInputScalars(std::move(scalar_inputs))
1036, mOutputScalars(std::move(scalar_outputs))
1037, mInternalScalars( std::move(internal_scalars))
[5350]1038, mCurrentMethod(nullptr)
[5706]1039, mStride(0)
[5418]1040, mIsFinal(nullptr)
[6184]1041, mNumOfStrides(nullptr)
[6186]1042, mKernelName(annotateKernelNameWithDebugFlags(std::move(kernelName))) {
[5283]1043
1044}
1045
[6184]1046Kernel::~Kernel() { }
[5283]1047
[5285]1048// CONSTRUCTOR
[5435]1049SegmentOrientedKernel::SegmentOrientedKernel(std::string && kernelName,
[5755]1050                                             Bindings && stream_inputs,
1051                                             Bindings && stream_outputs,
1052                                             Bindings && scalar_parameters,
1053                                             Bindings && scalar_outputs,
1054                                             Bindings && internal_scalars)
[6184]1055: Kernel(std::move(kernelName),
1056         std::move(stream_inputs), std::move(stream_outputs),
1057         std::move(scalar_parameters), std::move(scalar_outputs),
1058         std::move(internal_scalars))  {
[5706]1059
[5283]1060}
[5615]1061
1062
[5435]1063}
Note: See TracBrowser for help on using the repository browser.