source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 6288

Last change on this file since 6288 was 6288, checked in by cameron, 7 months ago

Repeat of prior check in

File size: 60.5 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <toolchain/driver.h>
9#include <kernels/relationship.h>
10#include <kernels/streamset.h>
11#include <kernels/kernel_builder.h>
12#include <llvm/IR/CallingConv.h>
13#include <llvm/IR/DerivedTypes.h>
14#include <llvm/IR/Constants.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/MDBuilder.h>
18#include <llvm/IR/Module.h>
19#include <llvm/Support/raw_ostream.h>
20#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
21#include <llvm/Bitcode/ReaderWriter.h>
22#else
23#include <llvm/Bitcode/BitcodeWriter.h>
24#endif
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/Support/Debug.h>
27#include <boost/uuid/sha1.hpp>
28#include <llvm/Support/Format.h>
29#include <sstream>
30#include <llvm/Support/raw_ostream.h>
31
32using namespace llvm;
33using namespace boost;
34
35namespace kernel {
36
37using AttrId = Attribute::KindId;
38using RateValue = ProcessingRate::RateValue;
39using RateId = ProcessingRate::KindId;
40using StreamPort = Kernel::StreamSetPort;
41using Port = Kernel::Port;
42
43// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
44// within the kernel state to hide the complexity from the user?
45
46const static auto INIT_SUFFIX = "_Init";
47const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
48const static auto TERMINATE_SUFFIX = "_Terminate";
49
50/** ------------------------------------------------------------------------------------------------------------- *
51 * @brief setInstance
52 ** ------------------------------------------------------------------------------------------------------------- */
53void Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
54    assert ("handle cannot be null!" && handle);
55    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
56    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
57    #ifndef NDEBUG
58    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
59    const Function * const builderFunction = b->GetInsertBlock()->getParent();
60    assert ("handle is not from the current function." && (handleFunction == builderFunction));
61    #endif
62    mHandle = handle;
63}
64
65/** ------------------------------------------------------------------------------------------------------------- *
66 * @brief isLocalBuffer
67 ** ------------------------------------------------------------------------------------------------------------- */
68inline bool isLocalBuffer(const Binding & output) {
69    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
70}
71
72/** ------------------------------------------------------------------------------------------------------------- *
73 * @brief reset
74 ** ------------------------------------------------------------------------------------------------------------- */
75template <typename Vec>
76inline void reset(Vec & vec, const unsigned n) {
77    vec.resize(n);
78    std::fill_n(vec.begin(), n, nullptr);
79}
80
81/** ------------------------------------------------------------------------------------------------------------- *
82 * @brief addBaseKernelProperties
83 *
84 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
85 ** ------------------------------------------------------------------------------------------------------------- */
86void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
87
88    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
89    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
90    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
91    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
92
93    mStreamSetInputBuffers.clear();
94    const auto numOfInputStreams = mInputStreamSets.size();
95    mStreamSetInputBuffers.reserve(numOfInputStreams);
96    for (unsigned i = 0; i < numOfInputStreams; ++i) {
97        const Binding & input = mInputStreamSets[i];
98        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
99    }
100
101    mStreamSetOutputBuffers.clear();
102    const auto numOfOutputStreams = mOutputStreamSets.size();
103    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
104    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
105        const Binding & output = mOutputStreamSets[i];
106        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
107    }
108
109    // If an output is a managed buffer, store its handle.
110    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
111        const Binding & output = mOutputStreamSets[i];
112        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
113            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
114            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
115        }
116    }
117
118}
119
120/** ------------------------------------------------------------------------------------------------------------- *
121 * @brief addScalarToMap
122 ** ------------------------------------------------------------------------------------------------------------- */
123void Kernel::addScalarToMap(const StringRef name, const ScalarType scalarType, const unsigned index) {
124    const auto r = mScalarMap.insert(std::make_pair(name, ScalarField{scalarType, index}));
125    if (LLVM_UNLIKELY(!r.second)) {
126        const ScalarField & sf = r.first->second;
127        if (LLVM_UNLIKELY(sf.Type != scalarType || sf.Index != index)) {
128            report_fatal_error(getName() + " already contains scalar " + name);
129        }
130    }
131}
132
133/** ------------------------------------------------------------------------------------------------------------- *
134 * @brief addScalarToMap
135 ** ------------------------------------------------------------------------------------------------------------- */
136void Kernel::addStreamToMap(const StringRef name, const Port port, const unsigned index) {
137    const auto r = mStreamSetMap.insert(std::make_pair(name, std::make_pair(port, index)));
138    if (LLVM_UNLIKELY(!r.second)) {
139        const StreamPort & sf = r.first->second;
140        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
141            report_fatal_error(getName() + " already contains stream " + name);
142        }
143    }
144}
145
146/** ------------------------------------------------------------------------------------------------------------- *
147 * @brief addKernelDeclarations
148 ** ------------------------------------------------------------------------------------------------------------- */
149void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
150    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
151        llvm_unreachable("Kernel state must be constructed prior to calling addKernelDeclarations");
152    }
153    addInitializeDeclaration(b);
154    addDoSegmentDeclaration(b);
155    addFinalizeDeclaration(b);
156    linkExternalMethods(b);
157}
158
159/** ------------------------------------------------------------------------------------------------------------- *
160 * @brief generateKernel
161 ** ------------------------------------------------------------------------------------------------------------- */
162void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
163    if (LLVM_UNLIKELY(mIsGenerated)) return;
164    b->setKernel(this);
165    b->setModule(mModule);
166    addKernelDeclarations(b);
167    callGenerateInitializeMethod(b);
168    callGenerateDoSegmentMethod(b);
169    callGenerateFinalizeMethod(b);
170    addAdditionalFunctions(b);
171    mIsGenerated = true;
172}
173
174/** ------------------------------------------------------------------------------------------------------------- *
175 * @brief addInitializeDeclaration
176 ** ------------------------------------------------------------------------------------------------------------- */
177inline void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
178
179    std::vector<Type *> params;
180    if (LLVM_LIKELY(isStateful())) {
181        params.push_back(mKernelStateType->getPointerTo());
182    }
183    for (const Binding & binding : mInputScalars) {
184        params.push_back(binding.getType());
185    }
186
187    FunctionType * const initType = FunctionType::get(b->getInt1Ty(), params, false);
188    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
189    initFunc->setCallingConv(CallingConv::C);
190    initFunc->setDoesNotThrow();
191    auto args = initFunc->arg_begin();
192    if (LLVM_LIKELY(isStateful())) {
193        (args++)->setName("handle");
194    }
195    for (const Binding & binding : mInputScalars) {
196        (args++)->setName(binding.getName());
197    }
198
199    assert (args == initFunc->arg_end());
200}
201
202/** ------------------------------------------------------------------------------------------------------------- *
203 * @brief callGenerateInitializeMethod
204 ** ------------------------------------------------------------------------------------------------------------- */
205inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
206    const Kernel * const storedKernel = b->getKernel();
207    b->setKernel(this);
208    Value * const storedHandle = getHandle();
209    mCurrentMethod = getInitFunction(b->getModule());
210    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
211    auto args = mCurrentMethod->arg_begin();
212    if (LLVM_LIKELY(isStateful())) {
213        setHandle(b, &*(args++));
214    }
215    if (LLVM_LIKELY(isStateful())) {
216        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
217            b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
218        }
219        b->CreateStore(ConstantAggregateZero::get(mKernelStateType), mHandle);
220    }
221    for (const auto & binding : mInputScalars) {
222        b->setScalarField(binding.getName(), &*(args++));
223    }
224    const auto numOfOutputs = mOutputStreamSets.size();
225    for (unsigned i = 0; i < numOfOutputs; i++) {
226        const Binding & output = mOutputStreamSets[i];
227        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
228            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
229            mStreamSetOutputBuffers[i]->setHandle(b, handle);
230        }
231    }
232    // any kernel can set termination on initialization
233    mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
234    b->CreateStore(b->getFalse(), mTerminationSignalPtr);
235    initializeLocalScalarValues(b);
236    generateInitializeMethod(b);
237    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect) && isStateful())) {
238        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
239    }
240    b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
241    mTerminationSignalPtr = nullptr;
242
243    b->setKernel(storedKernel);
244    mHandle = storedHandle;
245    mCurrentMethod = nullptr;
246}
247
248/** ------------------------------------------------------------------------------------------------------------- *
249 * @brief isParamAddressable
250 ** ------------------------------------------------------------------------------------------------------------- */
251inline bool isParamAddressable(const Binding & binding) {
252    if (binding.isDeferred()) {
253        return true;
254    }
255    const ProcessingRate & rate = binding.getRate();
256    return (rate.isBounded() || rate.isUnknown());
257}
258
259/** ------------------------------------------------------------------------------------------------------------- *
260 * @brief isParamConstant
261 ** ------------------------------------------------------------------------------------------------------------- */
262inline bool isParamConstant(const Binding & binding) {
263    assert (!binding.isDeferred());
264    const ProcessingRate & rate = binding.getRate();
265    return rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount();
266}
267
268/** ------------------------------------------------------------------------------------------------------------- *
269 * @brief hasParam
270 ** ------------------------------------------------------------------------------------------------------------- */
271inline bool hasParam(const Binding & binding) {
272    return !binding.getRate().isRelative();
273}
274
275/** ------------------------------------------------------------------------------------------------------------- *
276 * @brief addDoSegmentDeclaration
277 ** ------------------------------------------------------------------------------------------------------------- */
278inline void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
279
280    Type * const retTy = canSetTerminateSignal() ? b->getInt1Ty() : b->getVoidTy();
281    FunctionType * const doSegmentType = FunctionType::get(retTy, getDoSegmentFields(b), false);
282    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
283    doSegment->setCallingConv(CallingConv::C);
284    doSegment->setDoesNotThrow();
285    auto args = doSegment->arg_begin();
286    if (LLVM_LIKELY(isStateful())) {
287        (args++)->setName("handle");
288    }
289    (args++)->setName("numOfStrides");
290    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
291        const Binding & input = mInputStreamSets[i];
292        (args++)->setName(input.getName());
293        if (LLVM_LIKELY(hasParam(input))) {
294            (args++)->setName(input.getName() + "_processed");
295        }
296        (args++)->setName(input.getName() + "_accessible");
297        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
298            (args++)->setName(input.getName() + "_popCountArray");
299        }
300        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
301            (args++)->setName(input.getName() + "_negatedPopCountArray");
302        }
303    }
304    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
305        const Binding & output = mOutputStreamSets[i];
306        if (LLVM_LIKELY(!isLocalBuffer(output))) {
307            (args++)->setName(output.getName());
308        }
309        if (LLVM_LIKELY(hasParam(output))) {
310            (args++)->setName(output.getName() + "_produced");
311        }
312        if (LLVM_LIKELY(isLocalBuffer(output))) {
313            (args++)->setName(output.getName() + "_consumed");
314        } else {
315            (args++)->setName(output.getName() + "_writable");
316        }
317    }
318    assert (args == doSegment->arg_end());
319}
320
321/** ------------------------------------------------------------------------------------------------------------- *
322 * @brief getDoSegmentFields
323 ** ------------------------------------------------------------------------------------------------------------- */
324std::vector<Type *> Kernel::getDoSegmentFields(const std::unique_ptr<KernelBuilder> & b) const {
325
326    IntegerType * const sizeTy = b->getSizeTy();
327    PointerType * const sizePtrTy = sizeTy->getPointerTo();
328
329    std::vector<Type *> fields;
330    fields.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
331    if (LLVM_LIKELY(isStateful())) {
332        fields.push_back(mKernelStateType->getPointerTo());  // handle
333    }
334    fields.push_back(sizeTy); // numOfStrides
335    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
336        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
337        // logical base input address
338        fields.push_back(bufferType->getPointerTo());
339        // processed input items
340        const Binding & input = mInputStreamSets[i];
341        if (isParamAddressable(input)) {
342            fields.push_back(sizePtrTy); // updatable
343        }  else if (isParamConstant(input)) {
344            fields.push_back(sizeTy);  // constant
345        }
346        // accessible input items (after non-deferred processed item count)
347        fields.push_back(sizeTy);
348        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
349            fields.push_back(sizePtrTy);
350        }
351        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
352            fields.push_back(sizePtrTy);
353        }
354    }
355
356    const auto canTerminate = canSetTerminateSignal();
357
358    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
359        const Binding & output = mOutputStreamSets[i];
360        // logical base output address
361        if (LLVM_LIKELY(!isLocalBuffer(output))) {
362            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
363            fields.push_back(bufferType->getPointerTo());
364        }
365        // produced output items
366        if (canTerminate || isParamAddressable(output)) {
367            fields.push_back(sizePtrTy); // updatable
368        } else if (isParamConstant(output)) {
369            fields.push_back(sizeTy); // constant
370        }
371        // If this is a local buffer, the next param is its consumed item count;
372        // otherwise it'll hold its writable output items.
373        fields.push_back(sizeTy);
374    }
375
376    return fields;
377}
378
379/** ------------------------------------------------------------------------------------------------------------- *
380 * @brief callGenerateDoSegmentMethod
381 ** ------------------------------------------------------------------------------------------------------------- */
382inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
383
384    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
385    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
386
387    const Kernel * const storedKernel = b->getKernel();
388    b->setKernel(this);
389    Value * const storedHandle = getHandle();
390    mCurrentMethod = getDoSegmentFunction(b->getModule());
391    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
392
393    std::vector<Value *> args;
394    args.reserve(mCurrentMethod->arg_size());
395    for (Argument & arg : mCurrentMethod->getArgumentList()) {
396        args.push_back(&arg);
397    }
398    setDoSegmentProperties(b, args);
399
400    generateKernelMethod(b);
401
402    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
403        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
404    }
405
406    const auto numOfInputs = getNumOfStreamInputs();
407
408    for (unsigned i = 0; i < numOfInputs; i++) {
409        if (mUpdatableProcessedInputItemPtr[i]) {
410            Value * const items = b->CreateLoad(mProcessedInputItemPtr[i]);
411            b->CreateStore(items, mUpdatableProcessedInputItemPtr[i]);
412        }
413    }
414
415    const auto numOfOutputs = getNumOfStreamOutputs();
416
417    for (unsigned i = 0; i < numOfOutputs; i++) {
418        if (mUpdatableProducedOutputItemPtr[i]) {
419            Value * const items = b->CreateLoad(mProducedOutputItemPtr[i]);
420            b->CreateStore(items, mUpdatableProducedOutputItemPtr[i]);
421        }
422    }
423
424    // return the termination signal (if one exists)
425    if (mTerminationSignalPtr) {
426        b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
427        mTerminationSignalPtr = nullptr;
428    } else {
429        b->CreateRetVoid();
430    }
431
432    // Clean up all of the constructed buffers.
433    b->setKernel(storedKernel);
434    mHandle = storedHandle;
435    mCurrentMethod = nullptr;
436    mIsFinal = nullptr;
437    mNumOfStrides = nullptr;
438}
439
440/** ------------------------------------------------------------------------------------------------------------- *
441 * @brief setDoSegmentProperties
442 ** ------------------------------------------------------------------------------------------------------------- */
443void Kernel::setDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b, const std::vector<Value *> & args) {
444
445    initializeLocalScalarValues(b);
446
447    auto arg = args.begin();
448    if (LLVM_LIKELY(isStateful())) {
449        setHandle(b, *arg++);
450    }
451
452    mNumOfStrides = *arg++;
453    mIsFinal = b->CreateIsNull(mNumOfStrides);
454    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
455        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
456    }
457
458    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
459    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
460    // and call system here but that is not an ideal way of handling this.
461
462    const auto numOfInputs = getNumOfStreamInputs();
463
464    reset(mProcessedInputItemPtr, numOfInputs);
465    reset(mAccessibleInputItems, numOfInputs);
466    reset(mAvailableInputItems, numOfInputs);
467    reset(mPopCountRateArray, numOfInputs);
468    reset(mNegatedPopCountRateArray, numOfInputs);
469    reset(mUpdatableProcessedInputItemPtr, numOfInputs);
470
471    IntegerType * const sizeTy = b->getSizeTy();
472
473    for (unsigned i = 0; i < numOfInputs; i++) {
474        /// ----------------------------------------------------
475        /// logical buffer base address
476        /// ----------------------------------------------------
477        const Binding & input = mInputStreamSets[i];
478        assert (arg != args.end());
479        Value * const addr = *arg++;
480        auto & buffer = mStreamSetInputBuffers[i];
481        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
482        buffer->setHandle(b, localHandle);
483        buffer->setBaseAddress(b.get(), addr);
484        /// ----------------------------------------------------
485        /// processed item count
486        /// ----------------------------------------------------
487
488        // NOTE: we create a redundant alloca to store the input param so that
489        // Mem2Reg can convert it into a PHINode if the item count is updated in
490        // a loop; otherwise, it will be discarded in favor of the param itself.
491
492        Value * processed = nullptr;
493        if (isParamAddressable(input)) {
494            assert (arg != args.end());
495            mUpdatableProcessedInputItemPtr[i] = *arg++;
496            processed = b->CreateLoad(mUpdatableProcessedInputItemPtr[i]);
497        } else if (LLVM_LIKELY(isParamConstant(input))) {
498            assert (arg != args.end());
499            processed = *arg++;
500        } else { // isRelative
501            const ProcessingRate & rate = input.getRate();
502            Port port; unsigned index;
503            std::tie(port, index) = getStreamPort(rate.getReference());
504            assert (port == Port::Input && index < i);
505            assert (mProcessedInputItemPtr[index]);
506            Value * const ref = b->CreateLoad(mProcessedInputItemPtr[index]);
507            processed = b->CreateMul2(ref, rate.getRate());
508        }
509        AllocaInst * const processedItems = b->CreateAlloca(sizeTy);
510        b->CreateStore(processed, processedItems);
511        mProcessedInputItemPtr[i] = processedItems;
512        /// ----------------------------------------------------
513        /// accessible item count
514        /// ----------------------------------------------------
515        assert (arg != args.end());
516        Value * const accessible = *arg++;
517        mAccessibleInputItems[i] = accessible;
518        Value * capacity = b->CreateAdd(processed, accessible);
519        mAvailableInputItems[i] = capacity;
520        if (input.hasLookahead()) {
521            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
522        }
523        buffer->setCapacity(b.get(), capacity);
524
525        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
526            assert (arg != args.end());
527            mPopCountRateArray[i] = *arg++;
528        }
529
530        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
531            assert (arg != args.end());
532            mNegatedPopCountRateArray[i] = *arg++;
533        }
534    }
535
536    // set all of the output buffers
537    const auto numOfOutputs = getNumOfStreamOutputs();
538    reset(mProducedOutputItemPtr, numOfOutputs);
539    reset(mWritableOutputItems, numOfOutputs);
540    reset(mConsumedOutputItems, numOfOutputs);
541    reset(mUpdatableProducedOutputItemPtr, numOfOutputs);
542
543    const auto canTerminate = canSetTerminateSignal();
544
545    for (unsigned i = 0; i < numOfOutputs; i++) {
546        /// ----------------------------------------------------
547        /// logical buffer base address
548        /// ----------------------------------------------------
549
550        auto & buffer = mStreamSetOutputBuffers[i];
551        const Binding & output = mOutputStreamSets[i];
552        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
553            // If an output is a managed buffer, the address is stored within the state instead
554            // of being passed in through the function call.
555            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
556            buffer->setHandle(b, handle);
557        } else {
558            assert (arg != args.end());
559            Value * const logicalBaseAddress = *arg++;
560            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
561            buffer->setHandle(b, localHandle);
562            buffer->setBaseAddress(b.get(), logicalBaseAddress);
563        }
564        /// ----------------------------------------------------
565        /// produced item count
566        /// ----------------------------------------------------
567        Value * produced = nullptr;
568        if (LLVM_LIKELY(canTerminate || isParamAddressable(output))) {
569            assert (arg != args.end());
570            mUpdatableProducedOutputItemPtr[i] = *arg++;
571            produced = b->CreateLoad(mUpdatableProducedOutputItemPtr[i]);
572        } else if (LLVM_LIKELY(isParamConstant(output))) {
573            assert (arg != args.end());
574            produced = *arg++;
575        } else { // isRelative
576
577            // For now, if something is produced at a relative rate to another stream in a kernel that
578            // may terminate, its final item count is inherited from its reference stream and cannot
579            // be set independently. Should they be independent at early termination?
580
581            const ProcessingRate & rate = output.getRate();
582            Port port; unsigned index;
583            std::tie(port, index) = getStreamPort(rate.getReference());
584            assert (port == Port::Input || (port == Port::Output && index < i));
585            const auto & items = (port == Port::Input) ? mProcessedInputItemPtr : mProducedOutputItemPtr;
586            Value * const ref = b->CreateLoad(items[index]);
587            produced = b->CreateMul2(ref, rate.getRate());
588        }
589        AllocaInst * const producedItems = b->CreateAlloca(sizeTy);
590        b->CreateStore(produced, producedItems);
591        mProducedOutputItemPtr[i] = producedItems;
592        /// ----------------------------------------------------
593        /// consumed or writable item count
594        /// ----------------------------------------------------
595        Value * const items = *arg++;
596        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
597            mConsumedOutputItems[i] = items;
598        } else {
599            mWritableOutputItems[i] = items;
600            Value * const capacity = b->CreateAdd(produced, items);
601            buffer->setCapacity(b.get(), capacity);
602        }
603    }
604    assert (arg == args.end());
605
606    // initialize the termination signal if this kernel can set it
607    mTerminationSignalPtr = nullptr;
608    if (canTerminate) {
609        mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
610        b->CreateStore(b->getFalse(), mTerminationSignalPtr);
611    }
612
613}
614
615/** ------------------------------------------------------------------------------------------------------------- *
616 * @brief getDoSegmentProperties
617 *
618 * Reverse of the setDoSegmentProperties operation; used by the PipelineKernel when constructing internal threads
619 * to simplify passing of the state data.
620 ** ------------------------------------------------------------------------------------------------------------- */
621std::vector<Value *> Kernel::getDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b) const {
622
623    std::vector<Value *> props;
624    if (LLVM_LIKELY(isStateful())) {
625        props.push_back(mHandle);
626    }
627    props.push_back(mNumOfStrides);
628
629    const auto numOfInputs = getNumOfStreamInputs();
630    for (unsigned i = 0; i < numOfInputs; i++) {
631        /// ----------------------------------------------------
632        /// logical buffer base address
633        /// ----------------------------------------------------
634        const auto & buffer = mStreamSetInputBuffers[i];
635        props.push_back(buffer->getBaseAddress(b.get()));
636        /// ----------------------------------------------------
637        /// processed item count
638        /// ----------------------------------------------------
639        const Binding & input = mInputStreamSets[i];
640        if (isParamAddressable(input)) {
641            props.push_back(mProcessedInputItemPtr[i]);
642        } else if (LLVM_LIKELY(isParamConstant(input))) {
643            props.push_back(b->CreateLoad(mProcessedInputItemPtr[i]));
644        }
645        /// ----------------------------------------------------
646        /// accessible item count
647        /// ----------------------------------------------------
648        props.push_back(mAccessibleInputItems[i]);
649        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
650            props.push_back(mPopCountRateArray[i]);
651        }
652        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
653            props.push_back(mNegatedPopCountRateArray[i]);
654        }
655    }
656
657    // set all of the output buffers
658    const auto numOfOutputs = getNumOfStreamOutputs();
659    const auto canTerminate = canSetTerminateSignal();
660
661    for (unsigned i = 0; i < numOfOutputs; i++) {
662        /// ----------------------------------------------------
663        /// logical buffer base address
664        /// ----------------------------------------------------
665        const auto & buffer = mStreamSetOutputBuffers[i];
666        const Binding & output = mOutputStreamSets[i];
667        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
668            // If an output is a managed buffer, the address is stored within the state instead
669            // of being passed in through the function call.
670            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
671            props.push_back(handle);
672        } else {
673            props.push_back(buffer->getBaseAddress(b.get()));
674        }
675        /// ----------------------------------------------------
676        /// produced item count
677        /// ----------------------------------------------------
678        if (LLVM_LIKELY(canTerminate || isParamAddressable(output))) {
679            props.push_back(mProducedOutputItemPtr[i]);
680        } else if (LLVM_LIKELY(isParamConstant(output))) {
681            props.push_back(b->CreateLoad(mProducedOutputItemPtr[i]));
682        }
683        /// ----------------------------------------------------
684        /// consumed or writable item count
685        /// ----------------------------------------------------
686        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
687            props.push_back(mConsumedOutputItems[i]);
688        } else {
689            props.push_back(mWritableOutputItems[i]);
690        }
691    }
692
693    return props;
694}
695
696/** ------------------------------------------------------------------------------------------------------------- *
697 * @brief addFinalizeDeclaration
698 ** ------------------------------------------------------------------------------------------------------------- */
699inline void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
700    Type * resultType = nullptr;
701    if (mOutputScalars.empty()) {
702        resultType = b->getVoidTy();
703    } else {
704        const auto n = mOutputScalars.size();
705        Type * outputType[n];
706        for (unsigned i = 0; i < n; ++i) {
707            outputType[i] = mOutputScalars[i].getType();
708        }
709        if (n == 1) {
710            resultType = outputType[0];
711        } else {
712            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
713        }
714    }
715    std::vector<Type *> params;
716    if (LLVM_LIKELY(isStateful())) {
717        params.push_back(mKernelStateType->getPointerTo());
718    }
719    FunctionType * const terminateType = FunctionType::get(resultType, params, false);
720    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
721    terminateFunc->setCallingConv(CallingConv::C);
722    terminateFunc->setDoesNotThrow();
723    auto args = terminateFunc->arg_begin();
724    if (LLVM_LIKELY(isStateful())) {
725        (args++)->setName("handle");
726    }
727    assert (args == terminateFunc->arg_end());
728}
729
730/** ------------------------------------------------------------------------------------------------------------- *
731 * @brief callGenerateFinalizeMethod
732 ** ------------------------------------------------------------------------------------------------------------- */
733inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
734
735    const Kernel * const storedKernel = b->getKernel();
736    b->setKernel(this);
737    mCurrentMethod = getTerminateFunction(b->getModule());
738    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
739    if (LLVM_LIKELY(isStateful())) {
740        auto args = mCurrentMethod->arg_begin();
741        setHandle(b, &*(args++));
742        assert (args == mCurrentMethod->arg_end());
743    }
744    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
745        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
746    }
747    const auto numOfOutputs = mOutputStreamSets.size();
748    for (unsigned i = 0; i < numOfOutputs; i++) {
749        const Binding & output = mOutputStreamSets[i];
750        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
751            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
752            mStreamSetOutputBuffers[i]->setHandle(b, handle);
753        }
754    }
755    initializeLocalScalarValues(b);
756    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
757    const auto outputs = getFinalOutputScalars(b);
758    if (LLVM_LIKELY(isStateful())) {
759        b->CreateFree(mHandle);
760    }
761    mHandle = nullptr;
762    if (outputs.empty()) {
763        b->CreateRetVoid();
764    } else {
765        const auto n = outputs.size();
766        if (n == 1) {
767            b->CreateRet(outputs[0]);
768        } else {
769            b->CreateAggregateRet(outputs.data(), n);
770        }
771    }
772
773    b->setKernel(storedKernel);
774    mCurrentMethod = nullptr;
775}
776
777/** ------------------------------------------------------------------------------------------------------------- *
778 * @brief callGenerateFinalizeMethod
779 ** ------------------------------------------------------------------------------------------------------------- */
780std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
781    const auto n = mOutputScalars.size();
782    std::vector<Value *> outputs(n);
783    for (unsigned i = 0; i < n; ++i) {
784        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
785    }
786    return outputs;
787}
788
789/** ------------------------------------------------------------------------------------------------------------- *
790 * @brief getCacheName
791 ** ------------------------------------------------------------------------------------------------------------- */
792std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & b) const {
793    std::stringstream cacheName;
794    cacheName << getName() << '_' << b->getBuilderUniqueName();
795    return cacheName.str();
796}
797
798/** ------------------------------------------------------------------------------------------------------------- *
799 * @brief setModule
800 ** ------------------------------------------------------------------------------------------------------------- */
801Module * Kernel::setModule(Module * const module) {
802    assert (mModule == nullptr || mModule == module);
803    assert (module != nullptr);
804    mModule = module;
805    return mModule;
806}
807
808/** ------------------------------------------------------------------------------------------------------------- *
809 * @brief makeModule
810 ** ------------------------------------------------------------------------------------------------------------- */
811Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
812    Module * m = new Module(getCacheName(b), b->getContext());
813    m->setTargetTriple(b->getModule()->getTargetTriple());
814    m->setDataLayout(b->getModule()->getDataLayout());
815    return setModule(m);
816}
817
818
819/** ------------------------------------------------------------------------------------------------------------- *
820 * @brief getInitFunction
821 ** ------------------------------------------------------------------------------------------------------------- */
822Function * Kernel::getInitFunction(Module * const module) const {
823    const auto name = getName() + INIT_SUFFIX;
824    Function * f = module->getFunction(name);
825    if (LLVM_UNLIKELY(f == nullptr)) {
826        llvm_unreachable("cannot find Initialize function");
827    }
828    return f;
829}
830
831/** ------------------------------------------------------------------------------------------------------------- *
832 * @brief getDoSegmentFunction
833 ** ------------------------------------------------------------------------------------------------------------- */
834Function * Kernel::getDoSegmentFunction(Module * const module) const {
835    const auto name = getName() + DO_SEGMENT_SUFFIX;
836    Function * f = module->getFunction(name);
837    if (LLVM_UNLIKELY(f == nullptr)) {
838        llvm_unreachable("cannot find DoSegment function");
839    }
840    return f;
841}
842
843/** ------------------------------------------------------------------------------------------------------------- *
844 * @brief getTerminateFunction
845 ** ------------------------------------------------------------------------------------------------------------- */
846Function * Kernel::getTerminateFunction(Module * const module) const {
847    const auto name = getName() + TERMINATE_SUFFIX;
848    Function * f = module->getFunction(name);
849    if (LLVM_UNLIKELY(f == nullptr)) {
850        llvm_unreachable("cannot find Terminate function");
851    }
852    return f;
853}
854
855/** ------------------------------------------------------------------------------------------------------------- *
856 * @brief isStateful
857 ** ------------------------------------------------------------------------------------------------------------- */
858LLVM_READNONE bool Kernel::isStateful() const {
859    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
860        llvm_unreachable("kernel state must be constructed prior to calling isStateful");
861    }
862    return !mKernelStateType->isEmptyTy();
863}
864
865
866/** ------------------------------------------------------------------------------------------------------------- *
867 * @brief prepareKernel
868 ** ------------------------------------------------------------------------------------------------------------- */
869void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
870    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
871        llvm_unreachable("Cannot call prepareKernel after constructing kernel state type");
872    }
873    if (LLVM_UNLIKELY(mStride == 0)) {
874        report_fatal_error(getName() + ": stride cannot be 0");
875    }
876    addBaseKernelProperties(b);
877    addInternalKernelProperties(b);
878    // NOTE: StructType::create always creates a new type even if an identical one exists.
879    if (LLVM_UNLIKELY(mModule == nullptr)) {
880        makeModule(b);
881    }
882    mKernelStateType = mModule->getTypeByName(getName());
883    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
884        std::vector<Type *> fields;
885        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
886        for (const Binding & scalar : mInputScalars) {
887            assert (scalar.getType());
888            fields.push_back(scalar.getType());
889        }
890        for (const Binding & scalar : mOutputScalars) {
891            assert (scalar.getType());
892            fields.push_back(scalar.getType());
893        }
894        for (const Binding & scalar : mInternalScalars) {
895            assert (scalar.getType());
896            fields.push_back(scalar.getType());
897        }
898        mKernelStateType = StructType::create(b->getContext(), fields, getName());
899    }
900    assert (isa<StructType>(mKernelStateType));
901}
902
903/** ------------------------------------------------------------------------------------------------------------- *
904 * @brief addInternalScalar
905 ** ------------------------------------------------------------------------------------------------------------- */
906void Kernel::addInternalScalar(Type * type, const StringRef name) {
907    const auto index = mInternalScalars.size();
908    mInternalScalars.emplace_back(type, name);
909    addScalarToMap(name, ScalarType::Internal, index);
910}
911
912/** ------------------------------------------------------------------------------------------------------------- *
913 * @brief addLocalScalar
914 ** ------------------------------------------------------------------------------------------------------------- */
915void Kernel::addLocalScalar(Type * type, const StringRef name) {
916    const auto index = mLocalScalars.size();
917    mLocalScalars.emplace_back(type, name);
918    addScalarToMap(name, ScalarType::Local, index);
919}
920
921/** ------------------------------------------------------------------------------------------------------------- *
922 * @brief prepareCachedKernel
923 ** ------------------------------------------------------------------------------------------------------------- */
924void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
925    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
926        llvm_unreachable("Cannot call prepareCachedKernel after constructing kernel state type");
927    }
928    addBaseKernelProperties(b);
929    mKernelStateType = getModule()->getTypeByName(getName());
930    // If we have a stateless object, the type would be optimized out of the
931    // cached IR. Consequently, we create a dummy "empty struct" to simplify
932    // the assumptions of the other Kernel functions.
933    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
934        mKernelStateType = StructType::get(b->getContext());
935    }
936    assert (isa<StructType>(mKernelStateType));
937}
938
939/** ------------------------------------------------------------------------------------------------------------- *
940 * @brief makeSignature
941 *
942 * Default kernel signature: generate the IR and emit as byte code.
943 ** ------------------------------------------------------------------------------------------------------------- */
944std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
945    if (LLVM_UNLIKELY(hasSignature())) {
946        generateKernel(b);
947        std::string tmp;
948        raw_string_ostream signature(tmp);
949        WriteBitcodeToFile(getModule(), signature);
950        return signature.str();
951    } else {
952        return getModule()->getModuleIdentifier();
953    }
954}
955
956/** ------------------------------------------------------------------------------------------------------------- *
957 * @brief getStringHash
958 *
959 * Create a fixed length string hash of the given str
960 ** ------------------------------------------------------------------------------------------------------------- */
961std::string Kernel::getStringHash(const StringRef str) {
962
963    uint32_t digest[5]; // 160 bits in total
964    boost::uuids::detail::sha1 sha1;
965    sha1.process_bytes(str.data(), str.size());
966    sha1.get_digest(digest);
967
968    std::string buffer;
969    buffer.reserve((5 * 8) + 1);
970    raw_string_ostream out(buffer);
971    for (unsigned i = 0; i < 5; ++i) {
972        out << format_hex_no_prefix(digest[i], 8);
973    }
974    out.flush();
975
976    return buffer;
977}
978
979/** ------------------------------------------------------------------------------------------------------------- *
980 * @brief createInstance
981 ** ------------------------------------------------------------------------------------------------------------- */
982Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
983    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
984        llvm_unreachable("Kernel state must be constructed prior to calling createInstance");
985    }
986    if (LLVM_LIKELY(isStateful())) {
987        Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
988        Value * handle = nullptr;
989        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
990            handle = b->CreateAlignedMalloc(size, b->getPageSize());
991            b->CreateMProtect(handle, size, CBuilder::Protect::READ);
992        } else {
993            handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
994        }
995        return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
996    }
997    llvm_unreachable("createInstance should not be called on stateless kernels");
998    return nullptr;
999}
1000
1001/** ------------------------------------------------------------------------------------------------------------- *
1002 * @brief initializeInstance
1003 ** ------------------------------------------------------------------------------------------------------------- */
1004void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
1005    assert (args.size() == getNumOfScalarInputs() + 1);
1006    assert (args[0] && "cannot initialize before creation");
1007    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
1008    b->setKernel(this);
1009    Function * const init = getInitFunction(b->getModule());
1010    b->CreateCall(init, args);
1011}
1012
1013/** ------------------------------------------------------------------------------------------------------------- *
1014 * @brief finalizeInstance
1015 ** ------------------------------------------------------------------------------------------------------------- */
1016Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
1017    Value * result = nullptr;
1018    Function * const termFunc = getTerminateFunction(b->getModule());
1019    if (LLVM_LIKELY(isStateful())) {
1020        result = b->CreateCall(termFunc, { mHandle });
1021    } else {
1022        result = b->CreateCall(termFunc);
1023    }
1024    mHandle = nullptr;
1025    if (mOutputScalars.empty()) {
1026        assert (!result || result->getType()->isVoidTy());
1027        result = nullptr;
1028    }
1029    return result;
1030
1031}
1032
1033/** ------------------------------------------------------------------------------------------------------------- *
1034 * @brief getScalarField
1035 ** ------------------------------------------------------------------------------------------------------------- */
1036const Kernel::ScalarField & Kernel::getScalarField(const StringRef name) const {
1037    assert (!mScalarMap.empty());
1038    const auto f = mScalarMap.find(name);
1039    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
1040        assert (!"could not find scalar!");
1041        report_fatal_error(getName() + " does not contain scalar: " + name);
1042    }
1043    return f->second;
1044}
1045
1046/** ------------------------------------------------------------------------------------------------------------- *
1047 * @brief getScalarFieldPtr
1048 ** ------------------------------------------------------------------------------------------------------------- */
1049Value * Kernel::getScalarFieldPtr(KernelBuilder & b, const StringRef name) const {
1050    const auto & field = getScalarField(name);
1051    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
1052        llvm_unreachable("Kernel state must be constructed prior to calling getScalarFieldPtr");
1053    }
1054    unsigned index = field.Index;
1055    switch (field.Type) {
1056        case ScalarType::Local:
1057            return mLocalScalarPtr[index];
1058        case ScalarType::Internal:
1059            index += mOutputScalars.size();
1060        case ScalarType::Output:
1061            index += mInputScalars.size();
1062        case ScalarType::Input:
1063            break;
1064    }
1065    assert (index < mKernelStateType->getStructNumElements());
1066    return b.CreateGEP(getHandle(), {b.getInt32(0), b.getInt32(index)});
1067}
1068
1069/** ------------------------------------------------------------------------------------------------------------- *
1070 * @brief initializeLocalScalarValues
1071 ** ------------------------------------------------------------------------------------------------------------- */
1072void Kernel::initializeLocalScalarValues(const std::unique_ptr<KernelBuilder> & b) {
1073    if (LLVM_LIKELY(mLocalScalars.empty())) {
1074        return;
1075    }
1076    mLocalScalarPtr.resize(mLocalScalars.size());
1077    const auto end = mScalarMap.end();
1078    for (auto i = mScalarMap.begin(); i != end; ++i) {
1079        ScalarField & field = i->getValue();
1080        if (LLVM_UNLIKELY(field.Type == ScalarType::Local)) {
1081            const auto index = field.Index;
1082            const Binding & local = mLocalScalars[index];
1083            Value * const scalar = b->CreateAlloca(local.getType());
1084            b->CreateStore(ConstantAggregateZero::get(local.getType()), scalar);
1085            mLocalScalarPtr[index] = scalar;
1086        }
1087    }
1088}
1089
1090/** ------------------------------------------------------------------------------------------------------------- *
1091 * @brief getInputScalarBinding
1092 ** ------------------------------------------------------------------------------------------------------------- */
1093Binding & Kernel::getInputScalarBinding(const StringRef name) {
1094    const ScalarField & field = getScalarField(name);
1095    if (LLVM_UNLIKELY(field.Type != ScalarType::Input)) {
1096        report_fatal_error(getName() + "." + name + "is not an input scalar");
1097    }
1098    return mInputScalars[field.Index];
1099}
1100
1101/** ------------------------------------------------------------------------------------------------------------- *
1102 * @brief getOutputScalarBinding
1103 ** ------------------------------------------------------------------------------------------------------------- */
1104Binding & Kernel::getOutputScalarBinding(const StringRef name) {
1105    const ScalarField & field = getScalarField(name);
1106    if (LLVM_UNLIKELY(field.Type != ScalarType::Output)) {
1107        report_fatal_error(getName() + "." + name + "is not an output scalar");
1108    }
1109    return mOutputScalars[field.Index];
1110}
1111
1112/** ------------------------------------------------------------------------------------------------------------- *
1113 * @brief getStreamPort
1114 ** ------------------------------------------------------------------------------------------------------------- */
1115Kernel::StreamSetPort Kernel::getStreamPort(const StringRef name) const {
1116    const auto f = mStreamSetMap.find(name);
1117    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
1118        assert (!"could not find stream set!");
1119        report_fatal_error(getName() + " does not contain stream set " + name);
1120    }
1121    return f->second;
1122}
1123
1124/** ------------------------------------------------------------------------------------------------------------- *
1125 * @brief getBinding
1126 ** ------------------------------------------------------------------------------------------------------------- */
1127const Binding & Kernel::getStreamBinding(const StringRef name) const {
1128    Port port; unsigned index;
1129    std::tie(port, index) = getStreamPort(name);
1130    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
1131}
1132
1133/** ------------------------------------------------------------------------------------------------------------- *
1134 * @brief getLowerBound
1135 ** ------------------------------------------------------------------------------------------------------------- */
1136RateValue Kernel::getLowerBound(const Binding & binding) const {
1137    const ProcessingRate & rate = binding.getRate();
1138    if (rate.hasReference()) {
1139        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
1140    } else {
1141        return rate.getLowerBound();
1142    }
1143}
1144
1145/** ------------------------------------------------------------------------------------------------------------- *
1146 * @brief getUpperBound
1147 ** ------------------------------------------------------------------------------------------------------------- */
1148RateValue Kernel::getUpperBound(const Binding & binding) const {
1149    const ProcessingRate & rate = binding.getRate();
1150    if (rate.hasReference()) {
1151        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
1152    } else {
1153        return rate.getUpperBound();
1154    }
1155}
1156
1157/** ------------------------------------------------------------------------------------------------------------- *
1158 * @brief isCountable
1159 ** ------------------------------------------------------------------------------------------------------------- */
1160bool Kernel::isCountable(const Binding & binding) const {
1161    const ProcessingRate & rate = binding.getRate();
1162    if (rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount()) {
1163        return true;
1164    } else if (rate.isRelative()) {
1165        return isCountable(getStreamBinding(rate.getReference()));
1166    } else {
1167        return false;
1168    }
1169}
1170
1171/** ------------------------------------------------------------------------------------------------------------- *
1172 * @brief isCalculable
1173 ** ------------------------------------------------------------------------------------------------------------- */
1174bool Kernel::isCalculable(const Binding & binding) const {
1175    const ProcessingRate & rate = binding.getRate();
1176    if (rate.isFixed() || rate.isBounded()) {
1177        return true;
1178    } else if (rate.isRelative()) {
1179        return isCalculable(getStreamBinding(rate.getReference()));
1180    } else {
1181        return false;
1182    }
1183}
1184
1185/** ------------------------------------------------------------------------------------------------------------- *
1186 * @brief requiresOverflow
1187 ** ------------------------------------------------------------------------------------------------------------- */
1188bool Kernel::requiresOverflow(const Binding & binding) const {
1189    const ProcessingRate & rate = binding.getRate();
1190    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
1191        return false;
1192    } else if (rate.isRelative()) {
1193        return requiresOverflow(getStreamBinding(rate.getReference()));
1194    } else {
1195        return true;
1196    }
1197}
1198
1199/** ------------------------------------------------------------------------------------------------------------- *
1200 * @brief isUnknownRate
1201 ** ------------------------------------------------------------------------------------------------------------- */
1202bool Kernel::isUnknownRate(const Binding & binding) const {
1203    const ProcessingRate & rate = binding.getRate();
1204    if (rate.isUnknown()) {
1205        return true;
1206    } else if (rate.isRelative()) {
1207        return isUnknownRate(getStreamBinding(rate.getReference()));
1208    } else {
1209        return false;
1210    }
1211}
1212
1213/** ------------------------------------------------------------------------------------------------------------- *
1214 * @brief initializeBindings
1215 ** ------------------------------------------------------------------------------------------------------------- */
1216void Kernel::initializeBindings(BaseDriver & driver) {
1217
1218    for (unsigned i = 0; i < mInputScalars.size(); i++) {
1219        Binding & input = mInputScalars[i];
1220        addScalarToMap(input.getName(), ScalarType::Input, i);
1221        if (input.getRelationship() == nullptr) {
1222            input.setRelationship(driver.CreateScalar(input.getType()));
1223        }
1224    }
1225    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
1226        Binding & input = mInputStreamSets[i];
1227        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
1228            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
1229        }
1230        addStreamToMap(input.getName(), Port::Input, i);
1231    }
1232    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
1233        Binding & output = mOutputStreamSets[i];
1234        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
1235            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
1236        }
1237        addStreamToMap(output.getName(), Port::Output, i);
1238    }
1239    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
1240        const Binding & internal = mInternalScalars[i];
1241        addScalarToMap(internal.getName(), ScalarType::Internal, i);
1242    }
1243    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
1244        Binding & output = mOutputScalars[i];
1245        addScalarToMap(output.getName(), ScalarType::Output, i);
1246        if (output.getRelationship() == nullptr) {
1247            output.setRelationship(driver.CreateScalar(output.getType()));
1248        }
1249    }
1250}
1251
1252/** ------------------------------------------------------------------------------------------------------------- *
1253 * @brief setInputStreamSetAt
1254 ** ------------------------------------------------------------------------------------------------------------- */
1255void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
1256    mInputStreamSets[i].setRelationship(value);
1257}
1258
1259/** ------------------------------------------------------------------------------------------------------------- *
1260 * @brief setOutputStreamSetAt
1261 ** ------------------------------------------------------------------------------------------------------------- */
1262void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
1263    mOutputStreamSets[i].setRelationship(value);
1264}
1265
1266/** ------------------------------------------------------------------------------------------------------------- *
1267 * @brief setInputScalarAt
1268 ** ------------------------------------------------------------------------------------------------------------- */
1269void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
1270    mInputScalars[i].setRelationship(value);
1271}
1272
1273/** ------------------------------------------------------------------------------------------------------------- *
1274 * @brief setOutputScalarAt
1275 ** ------------------------------------------------------------------------------------------------------------- */
1276void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
1277    mOutputScalars[i].setRelationship(value);
1278}
1279
1280/** ------------------------------------------------------------------------------------------------------------- *
1281 * @brief generateKernelMethod
1282 ** ------------------------------------------------------------------------------------------------------------- */
1283void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
1284    generateDoSegmentMethod(b);
1285}
1286
1287/** ------------------------------------------------------------------------------------------------------------- *
1288 * @brief annotateKernelNameWithDebugFlags
1289 ** ------------------------------------------------------------------------------------------------------------- */
1290inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
1291    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
1292        name += "_EA";
1293    }
1294    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
1295        name += "_MP";
1296    }
1297    name += "_O" + std::to_string((int)codegen::OptLevel);
1298    return name;
1299}
1300
1301/** ------------------------------------------------------------------------------------------------------------- *
1302 * @brief getDefaultFamilyName
1303 ** ------------------------------------------------------------------------------------------------------------- */
1304std::string Kernel::getDefaultFamilyName() const {
1305    std::string tmp;
1306    raw_string_ostream out(tmp);
1307    if (LLVM_LIKELY(isStateful())) {
1308        out << "F";
1309    } else {
1310        out << "L";
1311    }
1312    out << getStride();
1313    AttributeSet::print(out);
1314    for (const Binding & input : mInputScalars) {
1315        out << ",IV("; input.print(this, out); out << ')';
1316    }
1317    for (const Binding & input : mInputStreamSets) {
1318        out << ",IS("; input.print(this, out); out << ')';
1319    }
1320    for (const Binding & output : mOutputStreamSets) {
1321        out << ",OS("; output.print(this, out); out << ')';
1322    }
1323    for (const Binding & output : mOutputScalars) {
1324        out << ",OV("; output.print(this, out); out << ')';
1325    }
1326    out.flush();
1327    return tmp;
1328}
1329
1330// CONSTRUCTOR
1331Kernel::Kernel(const std::unique_ptr<KernelBuilder> & b,
1332               const TypeId typeId,
1333               std::string && kernelName,
1334               Bindings && stream_inputs,
1335               Bindings && stream_outputs,
1336               Bindings && scalar_inputs,
1337               Bindings && scalar_outputs,
1338               Bindings && internal_scalars)
1339: mIsGenerated(false)
1340, mHandle(nullptr)
1341, mModule(nullptr)
1342, mKernelStateType(nullptr)
1343, mInputStreamSets(std::move(stream_inputs))
1344, mOutputStreamSets(std::move(stream_outputs))
1345, mInputScalars(std::move(scalar_inputs))
1346, mOutputScalars(std::move(scalar_outputs))
1347, mInternalScalars( std::move(internal_scalars))
1348, mCurrentMethod(nullptr)
1349, mStride(b->getBitBlockWidth())
1350, mTerminationSignalPtr(nullptr)
1351, mIsFinal(nullptr)
1352, mNumOfStrides(nullptr)
1353, mKernelName(annotateKernelNameWithDebugFlags(std::move(kernelName)))
1354, mTypeId(typeId) {
1355
1356}
1357
1358Kernel::~Kernel() { }
1359
1360// CONSTRUCTOR
1361SegmentOrientedKernel::SegmentOrientedKernel(const std::unique_ptr<KernelBuilder> & b,
1362                                             std::string && kernelName,
1363                                             Bindings && stream_inputs,
1364                                             Bindings && stream_outputs,
1365                                             Bindings && scalar_parameters,
1366                                             Bindings && scalar_outputs,
1367                                             Bindings && internal_scalars)
1368: Kernel(b,
1369TypeId::SegmentOriented, std::move(kernelName),
1370std::move(stream_inputs), std::move(stream_outputs),
1371std::move(scalar_parameters), std::move(scalar_outputs),
1372std::move(internal_scalars)) {
1373
1374}
1375
1376
1377}
Note: See TracBrowser for help on using the repository browser.