source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

Last change on this file was 6297, checked in by cameron, 3 months ago

Merge branch 'master' of https://cs-git-research.cs.surrey.sfu.ca/cameron/parabix-devel

File size: 58.9 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <toolchain/driver.h>
9#include <kernels/relationship.h>
10#include <kernels/streamset.h>
11#include <kernels/kernel_builder.h>
12#include <llvm/IR/CallingConv.h>
13#include <llvm/IR/DerivedTypes.h>
14#include <llvm/IR/Constants.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/MDBuilder.h>
18#include <llvm/IR/Module.h>
19#include <llvm/Support/raw_ostream.h>
20#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
21#include <llvm/Bitcode/ReaderWriter.h>
22#else
23#include <llvm/Bitcode/BitcodeWriter.h>
24#endif
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/Support/Debug.h>
27#include <boost/uuid/sha1.hpp>
28#include <llvm/Support/Format.h>
29#include <sstream>
30#include <llvm/Support/raw_ostream.h>
31
32using namespace llvm;
33using namespace boost;
34
35namespace kernel {
36
37using AttrId = Attribute::KindId;
38using RateValue = ProcessingRate::RateValue;
39using RateId = ProcessingRate::KindId;
40using StreamPort = Kernel::StreamSetPort;
41using Port = Kernel::Port;
42
43// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
44// within the kernel state to hide the complexity from the user?
45
46const static auto INIT_SUFFIX = "_Init";
47const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
48const static auto TERMINATE_SUFFIX = "_Terminate";
49
50/** ------------------------------------------------------------------------------------------------------------- *
51 * @brief setInstance
52 ** ------------------------------------------------------------------------------------------------------------- */
53void Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
54    assert ("handle cannot be null!" && handle);
55    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
56    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
57    #ifndef NDEBUG
58    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
59    const Function * const builderFunction = b->GetInsertBlock()->getParent();
60    assert ("handle is not from the current function." && (handleFunction == builderFunction));
61    #endif
62    mHandle = handle;
63}
64
65/** ------------------------------------------------------------------------------------------------------------- *
66 * @brief isLocalBuffer
67 ** ------------------------------------------------------------------------------------------------------------- */
68inline bool isLocalBuffer(const Binding & output) {
69    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
70}
71
72/** ------------------------------------------------------------------------------------------------------------- *
73 * @brief reset
74 ** ------------------------------------------------------------------------------------------------------------- */
75template <typename Vec>
76inline void reset(Vec & vec, const unsigned n) {
77    vec.resize(n);
78    std::fill_n(vec.begin(), n, nullptr);
79}
80
81/** ------------------------------------------------------------------------------------------------------------- *
82 * @brief addBaseKernelProperties
83 *
84 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
85 ** ------------------------------------------------------------------------------------------------------------- */
86void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
87
88    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
89    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
90    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
91    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
92
93    mStreamSetInputBuffers.clear();
94    const auto numOfInputStreams = mInputStreamSets.size();
95    mStreamSetInputBuffers.reserve(numOfInputStreams);
96    for (unsigned i = 0; i < numOfInputStreams; ++i) {
97        const Binding & input = mInputStreamSets[i];
98        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
99    }
100
101    mStreamSetOutputBuffers.clear();
102    const auto numOfOutputStreams = mOutputStreamSets.size();
103    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
104    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
105        const Binding & output = mOutputStreamSets[i];
106        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
107    }
108
109    // If an output is a managed buffer, store its handle.
110    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
111        const Binding & output = mOutputStreamSets[i];
112        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
113            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
114            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
115        }
116    }
117
118}
119
120/** ------------------------------------------------------------------------------------------------------------- *
121 * @brief addScalarToMap
122 ** ------------------------------------------------------------------------------------------------------------- */
123void Kernel::addScalarToMap(const StringRef name, const ScalarType scalarType, const unsigned index) {
124    const auto r = mScalarMap.insert(std::make_pair(name, ScalarField{scalarType, index}));
125    if (LLVM_UNLIKELY(!r.second)) {
126        const ScalarField & sf = r.first->second;
127        if (LLVM_UNLIKELY(sf.Type != scalarType || sf.Index != index)) {
128            report_fatal_error(getName() + " already contains scalar " + name);
129        }
130    }
131}
132
133/** ------------------------------------------------------------------------------------------------------------- *
134 * @brief addScalarToMap
135 ** ------------------------------------------------------------------------------------------------------------- */
136void Kernel::addStreamToMap(const StringRef name, const Port port, const unsigned index) {
137    const auto r = mStreamSetMap.insert(std::make_pair(name, std::make_pair(port, index)));
138    if (LLVM_UNLIKELY(!r.second)) {
139        const StreamPort & sf = r.first->second;
140        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
141            report_fatal_error(getName() + " already contains stream " + name);
142        }
143    }
144}
145
146/** ------------------------------------------------------------------------------------------------------------- *
147 * @brief addKernelDeclarations
148 ** ------------------------------------------------------------------------------------------------------------- */
149void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
150    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
151        llvm_unreachable("Kernel state must be constructed prior to calling addKernelDeclarations");
152    }
153    addInitializeDeclaration(b);
154    addDoSegmentDeclaration(b);
155    addFinalizeDeclaration(b);
156    linkExternalMethods(b);
157}
158
159/** ------------------------------------------------------------------------------------------------------------- *
160 * @brief generateKernel
161 ** ------------------------------------------------------------------------------------------------------------- */
162void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
163    if (LLVM_UNLIKELY(mIsGenerated)) return;
164    b->setKernel(this);
165    b->setModule(mModule);
166    addKernelDeclarations(b);
167    callGenerateInitializeMethod(b);
168    callGenerateDoSegmentMethod(b);
169    callGenerateFinalizeMethod(b);
170    addAdditionalFunctions(b);
171    mIsGenerated = true;
172}
173
174/** ------------------------------------------------------------------------------------------------------------- *
175 * @brief addInitializeDeclaration
176 ** ------------------------------------------------------------------------------------------------------------- */
177inline void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
178
179    std::vector<Type *> params;
180    if (LLVM_LIKELY(isStateful())) {
181        params.push_back(mKernelStateType->getPointerTo());
182    }
183    for (const Binding & binding : mInputScalars) {
184        params.push_back(binding.getType());
185    }
186
187    FunctionType * const initType = FunctionType::get(b->getInt1Ty(), params, false);
188    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
189    initFunc->setCallingConv(CallingConv::C);
190    initFunc->setDoesNotThrow();
191    auto args = initFunc->arg_begin();
192    if (LLVM_LIKELY(isStateful())) {
193        (args++)->setName("handle");
194    }
195    for (const Binding & binding : mInputScalars) {
196        (args++)->setName(binding.getName());
197    }
198
199    assert (args == initFunc->arg_end());
200}
201
202/** ------------------------------------------------------------------------------------------------------------- *
203 * @brief callGenerateInitializeMethod
204 ** ------------------------------------------------------------------------------------------------------------- */
205inline void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
206    const Kernel * const storedKernel = b->getKernel();
207    b->setKernel(this);
208    Value * const storedHandle = getHandle();
209    mCurrentMethod = getInitFunction(b->getModule());
210    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
211    auto args = mCurrentMethod->arg_begin();
212    if (LLVM_LIKELY(isStateful())) {
213        setHandle(b, &*(args++));
214    }
215    if (LLVM_LIKELY(isStateful())) {
216        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
217            b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
218        }
219        b->CreateStore(ConstantAggregateZero::get(mKernelStateType), mHandle);
220    }
221    for (const auto & binding : mInputScalars) {
222        b->setScalarField(binding.getName(), &*(args++));
223    }
224    const auto numOfOutputs = mOutputStreamSets.size();
225    for (unsigned i = 0; i < numOfOutputs; i++) {
226        const Binding & output = mOutputStreamSets[i];
227        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
228            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
229            mStreamSetOutputBuffers[i]->setHandle(b, handle);
230        }
231    }
232    // any kernel can set termination on initialization
233    mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
234    b->CreateStore(b->getFalse(), mTerminationSignalPtr);
235    initializeLocalScalarValues(b);
236    generateInitializeMethod(b);
237    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect) && isStateful())) {
238        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
239    }
240    b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
241    mTerminationSignalPtr = nullptr;
242
243    b->setKernel(storedKernel);
244    mHandle = storedHandle;
245    mCurrentMethod = nullptr;
246}
247
248/** ------------------------------------------------------------------------------------------------------------- *
249 * @brief hasParam
250 ** ------------------------------------------------------------------------------------------------------------- */
251inline bool hasParam(const Binding & binding) {
252    return !binding.getRate().isRelative();
253}
254
255/** ------------------------------------------------------------------------------------------------------------- *
256 * @brief addDoSegmentDeclaration
257 ** ------------------------------------------------------------------------------------------------------------- */
258inline void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
259
260    Type * const retTy = canSetTerminateSignal() ? b->getInt1Ty() : b->getVoidTy();
261    FunctionType * const doSegmentType = FunctionType::get(retTy, getDoSegmentFields(b), false);
262    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
263    doSegment->setCallingConv(CallingConv::C);
264    doSegment->setDoesNotThrow();
265    auto args = doSegment->arg_begin();
266    if (LLVM_LIKELY(isStateful())) {
267        (args++)->setName("handle");
268    }
269    (args++)->setName("numOfStrides");
270    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
271        const Binding & input = mInputStreamSets[i];
272        (args++)->setName(input.getName());
273        if (LLVM_LIKELY(hasParam(input))) {
274            (args++)->setName(input.getName() + "_processed");
275        }
276        (args++)->setName(input.getName() + "_accessible");
277        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
278            (args++)->setName(input.getName() + "_popCountArray");
279        }
280        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
281            (args++)->setName(input.getName() + "_negatedPopCountArray");
282        }
283    }
284    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
285        const Binding & output = mOutputStreamSets[i];
286        if (LLVM_LIKELY(!isLocalBuffer(output))) {
287            (args++)->setName(output.getName());
288        }
289        if (LLVM_LIKELY(hasParam(output))) {
290            (args++)->setName(output.getName() + "_produced");
291        }
292        if (LLVM_LIKELY(isLocalBuffer(output))) {
293            (args++)->setName(output.getName() + "_consumed");
294        } else {
295            (args++)->setName(output.getName() + "_writable");
296        }
297    }
298    assert (args == doSegment->arg_end());
299}
300
301/** ------------------------------------------------------------------------------------------------------------- *
302 * @brief getDoSegmentFields
303 ** ------------------------------------------------------------------------------------------------------------- */
304std::vector<Type *> Kernel::getDoSegmentFields(const std::unique_ptr<KernelBuilder> & b) const {
305
306    IntegerType * const sizeTy = b->getSizeTy();
307    PointerType * const sizePtrTy = sizeTy->getPointerTo();
308
309    std::vector<Type *> fields;
310    fields.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
311    if (LLVM_LIKELY(isStateful())) {
312        fields.push_back(mKernelStateType->getPointerTo());  // handle
313    }
314    fields.push_back(sizeTy); // numOfStrides
315    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
316        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
317        // logical base input address
318        fields.push_back(bufferType->getPointerTo());
319        // processed input items
320        const Binding & input = mInputStreamSets[i];
321        if (isAddressable(input)) {
322            fields.push_back(sizePtrTy); // updatable
323        }  else if (isCountable(input)) {
324            fields.push_back(sizeTy);  // constant
325        }
326        // accessible input items (after non-deferred processed item count)
327        fields.push_back(sizeTy);
328        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
329            fields.push_back(sizePtrTy);
330        }
331        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
332            fields.push_back(sizePtrTy);
333        }
334    }
335
336    const auto canTerminate = canSetTerminateSignal();
337
338    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
339        const Binding & output = mOutputStreamSets[i];
340        // logical base output address
341        if (LLVM_LIKELY(!isLocalBuffer(output))) {
342            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
343            fields.push_back(bufferType->getPointerTo());
344        }
345        // produced output items
346        if (canTerminate || isAddressable(output)) {
347            fields.push_back(sizePtrTy); // updatable
348        } else if (isCountable(output)) {
349            fields.push_back(sizeTy); // constant
350        }
351        // If this is a local buffer, the next param is its consumed item count;
352        // otherwise it'll hold its writable output items.
353        fields.push_back(sizeTy);
354    }
355
356    return fields;
357}
358
359/** ------------------------------------------------------------------------------------------------------------- *
360 * @brief callGenerateDoSegmentMethod
361 ** ------------------------------------------------------------------------------------------------------------- */
362inline void Kernel::callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
363
364    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
365    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
366
367    const Kernel * const storedKernel = b->getKernel();
368    b->setKernel(this);
369    Value * const storedHandle = getHandle();
370    mCurrentMethod = getDoSegmentFunction(b->getModule());
371    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
372
373    std::vector<Value *> args;
374    args.reserve(mCurrentMethod->arg_size());
375    for (auto ArgI = mCurrentMethod->arg_begin(); ArgI != mCurrentMethod->arg_end(); ++ArgI) {
376        args.push_back(&(*ArgI));
377    }
378    setDoSegmentProperties(b, args);
379
380    generateKernelMethod(b);
381
382    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
383        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
384    }
385
386    const auto numOfInputs = getNumOfStreamInputs();
387
388    for (unsigned i = 0; i < numOfInputs; i++) {
389        if (mUpdatableProcessedInputItemPtr[i]) {
390            Value * const items = b->CreateLoad(mProcessedInputItemPtr[i]);
391            b->CreateStore(items, mUpdatableProcessedInputItemPtr[i]);
392        }
393    }
394
395    const auto numOfOutputs = getNumOfStreamOutputs();
396
397    for (unsigned i = 0; i < numOfOutputs; i++) {
398        if (mUpdatableProducedOutputItemPtr[i]) {
399            Value * const items = b->CreateLoad(mProducedOutputItemPtr[i]);
400            b->CreateStore(items, mUpdatableProducedOutputItemPtr[i]);
401        }
402    }
403
404    // return the termination signal (if one exists)
405    if (mTerminationSignalPtr) {
406        b->CreateRet(b->CreateLoad(mTerminationSignalPtr));
407        mTerminationSignalPtr = nullptr;
408    } else {
409        b->CreateRetVoid();
410    }
411
412    // Clean up all of the constructed buffers.
413    b->setKernel(storedKernel);
414    mHandle = storedHandle;
415    mCurrentMethod = nullptr;
416    mIsFinal = nullptr;
417    mNumOfStrides = nullptr;
418}
419
420/** ------------------------------------------------------------------------------------------------------------- *
421 * @brief setDoSegmentProperties
422 ** ------------------------------------------------------------------------------------------------------------- */
423void Kernel::setDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b, const std::vector<Value *> & args) {
424
425    initializeLocalScalarValues(b);
426
427    auto arg = args.begin();
428    if (LLVM_LIKELY(isStateful())) {
429        setHandle(b, *arg++);
430    }
431
432    mNumOfStrides = *arg++;
433    mIsFinal = b->CreateIsNull(mNumOfStrides);
434    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
435        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
436    }
437
438    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
439    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
440    // and call system here but that is not an ideal way of handling this.
441
442    const auto numOfInputs = getNumOfStreamInputs();
443
444    reset(mProcessedInputItemPtr, numOfInputs);
445    reset(mAccessibleInputItems, numOfInputs);
446    reset(mAvailableInputItems, numOfInputs);
447    reset(mPopCountRateArray, numOfInputs);
448    reset(mNegatedPopCountRateArray, numOfInputs);
449    reset(mUpdatableProcessedInputItemPtr, numOfInputs);
450
451    IntegerType * const sizeTy = b->getSizeTy();
452
453    for (unsigned i = 0; i < numOfInputs; i++) {
454        /// ----------------------------------------------------
455        /// logical buffer base address
456        /// ----------------------------------------------------
457        const Binding & input = mInputStreamSets[i];
458        assert (arg != args.end());
459        Value * const addr = *arg++;
460        auto & buffer = mStreamSetInputBuffers[i];
461        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
462        buffer->setHandle(b, localHandle);
463        buffer->setBaseAddress(b.get(), addr);
464        /// ----------------------------------------------------
465        /// processed item count
466        /// ----------------------------------------------------
467
468        // NOTE: we create a redundant alloca to store the input param so that
469        // Mem2Reg can convert it into a PHINode if the item count is updated in
470        // a loop; otherwise, it will be discarded in favor of the param itself.
471
472        Value * processed = nullptr;
473        if (isAddressable(input)) {
474            assert (arg != args.end());
475            mUpdatableProcessedInputItemPtr[i] = *arg++;
476            processed = b->CreateLoad(mUpdatableProcessedInputItemPtr[i]);
477        } else if (LLVM_LIKELY(isCountable(input))) {
478            assert (arg != args.end());
479            processed = *arg++;
480        } else { // isRelative
481            const ProcessingRate & rate = input.getRate();
482            Port port; unsigned index;
483            std::tie(port, index) = getStreamPort(rate.getReference());
484            assert (port == Port::Input && index < i);
485            assert (mProcessedInputItemPtr[index]);
486            Value * const ref = b->CreateLoad(mProcessedInputItemPtr[index]);
487            processed = b->CreateMul2(ref, rate.getRate());
488        }
489        AllocaInst * const processedItems = b->CreateAlloca(sizeTy);
490        b->CreateStore(processed, processedItems);
491        mProcessedInputItemPtr[i] = processedItems;
492        /// ----------------------------------------------------
493        /// accessible item count
494        /// ----------------------------------------------------
495        assert (arg != args.end());
496        Value * const accessible = *arg++;
497        mAccessibleInputItems[i] = accessible;
498        Value * capacity = b->CreateAdd(processed, accessible);
499        mAvailableInputItems[i] = capacity;
500        if (input.hasLookahead()) {
501            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
502        }
503        buffer->setCapacity(b.get(), capacity);
504
505        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
506            assert (arg != args.end());
507            mPopCountRateArray[i] = *arg++;
508        }
509
510        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
511            assert (arg != args.end());
512            mNegatedPopCountRateArray[i] = *arg++;
513        }
514    }
515
516    // set all of the output buffers
517    const auto numOfOutputs = getNumOfStreamOutputs();
518    reset(mProducedOutputItemPtr, numOfOutputs);
519    reset(mWritableOutputItems, numOfOutputs);
520    reset(mConsumedOutputItems, numOfOutputs);
521    reset(mUpdatableProducedOutputItemPtr, numOfOutputs);
522
523    const auto canTerminate = canSetTerminateSignal();
524
525    for (unsigned i = 0; i < numOfOutputs; i++) {
526        /// ----------------------------------------------------
527        /// logical buffer base address
528        /// ----------------------------------------------------
529
530        auto & buffer = mStreamSetOutputBuffers[i];
531        const Binding & output = mOutputStreamSets[i];
532        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
533            // If an output is a managed buffer, the address is stored within the state instead
534            // of being passed in through the function call.
535            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
536            buffer->setHandle(b, handle);
537        } else {
538            assert (arg != args.end());
539            Value * const logicalBaseAddress = *arg++;
540            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
541            buffer->setHandle(b, localHandle);
542            buffer->setBaseAddress(b.get(), logicalBaseAddress);
543        }
544        /// ----------------------------------------------------
545        /// produced item count
546        /// ----------------------------------------------------
547        Value * produced = nullptr;
548        if (LLVM_LIKELY(canTerminate || isAddressable(output))) {
549            assert (arg != args.end());
550            mUpdatableProducedOutputItemPtr[i] = *arg++;
551            produced = b->CreateLoad(mUpdatableProducedOutputItemPtr[i]);
552        } else if (LLVM_LIKELY(isCountable(output))) {
553            assert (arg != args.end());
554            produced = *arg++;
555        } else { // isRelative
556
557            // For now, if something is produced at a relative rate to another stream in a kernel that
558            // may terminate, its final item count is inherited from its reference stream and cannot
559            // be set independently. Should they be independent at early termination?
560
561            const ProcessingRate & rate = output.getRate();
562            Port port; unsigned index;
563            std::tie(port, index) = getStreamPort(rate.getReference());
564            assert (port == Port::Input || (port == Port::Output && index < i));
565            const auto & items = (port == Port::Input) ? mProcessedInputItemPtr : mProducedOutputItemPtr;
566            Value * const ref = b->CreateLoad(items[index]);
567            produced = b->CreateMul2(ref, rate.getRate());
568        }
569        AllocaInst * const producedItems = b->CreateAlloca(sizeTy);
570        b->CreateStore(produced, producedItems);
571        mProducedOutputItemPtr[i] = producedItems;
572        /// ----------------------------------------------------
573        /// consumed or writable item count
574        /// ----------------------------------------------------
575        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
576            Value * const consumed = *arg++;
577            mConsumedOutputItems[i] = consumed;
578        } else {
579            Value * writable = *arg++;
580            mWritableOutputItems[i] = writable;
581            Value * const capacity = b->CreateAdd(produced, writable);
582            buffer->setCapacity(b.get(), capacity);
583        }
584    }
585    assert (arg == args.end());
586
587    // initialize the termination signal if this kernel can set it
588    mTerminationSignalPtr = nullptr;
589    if (canTerminate) {
590        mTerminationSignalPtr = b->CreateAlloca(b->getInt1Ty(), nullptr, "terminationSignal");
591        b->CreateStore(b->getFalse(), mTerminationSignalPtr);
592    }
593
594}
595
596/** ------------------------------------------------------------------------------------------------------------- *
597 * @brief getDoSegmentProperties
598 *
599 * Reverse of the setDoSegmentProperties operation; used by the PipelineKernel when constructing internal threads
600 * to simplify passing of the state data.
601 ** ------------------------------------------------------------------------------------------------------------- */
602std::vector<Value *> Kernel::getDoSegmentProperties(const std::unique_ptr<KernelBuilder> & b) const {
603
604    std::vector<Value *> props;
605    if (LLVM_LIKELY(isStateful())) {
606        props.push_back(mHandle);
607    }
608    props.push_back(mNumOfStrides);
609
610    const auto numOfInputs = getNumOfStreamInputs();
611    for (unsigned i = 0; i < numOfInputs; i++) {
612        /// ----------------------------------------------------
613        /// logical buffer base address
614        /// ----------------------------------------------------
615        const auto & buffer = mStreamSetInputBuffers[i];
616        props.push_back(buffer->getBaseAddress(b.get()));
617        /// ----------------------------------------------------
618        /// processed item count
619        /// ----------------------------------------------------
620        const Binding & input = mInputStreamSets[i];
621        if (isAddressable(input)) {
622            props.push_back(mProcessedInputItemPtr[i]);
623        } else if (LLVM_LIKELY(isCountable(input))) {
624            props.push_back(b->CreateLoad(mProcessedInputItemPtr[i]));
625        }
626        /// ----------------------------------------------------
627        /// accessible item count
628        /// ----------------------------------------------------
629        props.push_back(mAccessibleInputItems[i]);
630        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
631            props.push_back(mPopCountRateArray[i]);
632        }
633        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
634            props.push_back(mNegatedPopCountRateArray[i]);
635        }
636    }
637
638    // set all of the output buffers
639    const auto numOfOutputs = getNumOfStreamOutputs();
640    const auto canTerminate = canSetTerminateSignal();
641
642    for (unsigned i = 0; i < numOfOutputs; i++) {
643        /// ----------------------------------------------------
644        /// logical buffer base address
645        /// ----------------------------------------------------
646        const auto & buffer = mStreamSetOutputBuffers[i];
647        const Binding & output = mOutputStreamSets[i];
648        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
649            // If an output is a managed buffer, the address is stored within the state instead
650            // of being passed in through the function call.
651            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
652            props.push_back(handle);
653        } else {
654            props.push_back(buffer->getBaseAddress(b.get()));
655        }
656        /// ----------------------------------------------------
657        /// produced item count
658        /// ----------------------------------------------------
659        if (LLVM_LIKELY(canTerminate || isAddressable(output))) {
660            props.push_back(mProducedOutputItemPtr[i]);
661        } else if (LLVM_LIKELY(isCountable(output))) {
662            props.push_back(b->CreateLoad(mProducedOutputItemPtr[i]));
663        }
664        /// ----------------------------------------------------
665        /// consumed or writable item count
666        /// ----------------------------------------------------
667        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
668            props.push_back(mConsumedOutputItems[i]);
669        } else {
670            props.push_back(mWritableOutputItems[i]);
671        }
672    }
673
674    return props;
675}
676
677/** ------------------------------------------------------------------------------------------------------------- *
678 * @brief addFinalizeDeclaration
679 ** ------------------------------------------------------------------------------------------------------------- */
680inline void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
681    Type * resultType = nullptr;
682    if (mOutputScalars.empty()) {
683        resultType = b->getVoidTy();
684    } else {
685        const auto n = mOutputScalars.size();
686        Type * outputType[n];
687        for (unsigned i = 0; i < n; ++i) {
688            outputType[i] = mOutputScalars[i].getType();
689        }
690        if (n == 1) {
691            resultType = outputType[0];
692        } else {
693            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
694        }
695    }
696    std::vector<Type *> params;
697    if (LLVM_LIKELY(isStateful())) {
698        params.push_back(mKernelStateType->getPointerTo());
699    }
700    FunctionType * const terminateType = FunctionType::get(resultType, params, false);
701    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
702    terminateFunc->setCallingConv(CallingConv::C);
703    terminateFunc->setDoesNotThrow();
704    auto args = terminateFunc->arg_begin();
705    if (LLVM_LIKELY(isStateful())) {
706        (args++)->setName("handle");
707    }
708    assert (args == terminateFunc->arg_end());
709}
710
711/** ------------------------------------------------------------------------------------------------------------- *
712 * @brief callGenerateFinalizeMethod
713 ** ------------------------------------------------------------------------------------------------------------- */
714inline void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
715
716    const Kernel * const storedKernel = b->getKernel();
717    b->setKernel(this);
718    mCurrentMethod = getTerminateFunction(b->getModule());
719    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
720    if (LLVM_LIKELY(isStateful())) {
721        auto args = mCurrentMethod->arg_begin();
722        setHandle(b, &*(args++));
723        assert (args == mCurrentMethod->arg_end());
724    }
725    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
726        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
727    }
728    const auto numOfOutputs = mOutputStreamSets.size();
729    for (unsigned i = 0; i < numOfOutputs; i++) {
730        const Binding & output = mOutputStreamSets[i];
731        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
732            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
733            mStreamSetOutputBuffers[i]->setHandle(b, handle);
734        }
735    }
736    initializeLocalScalarValues(b);
737    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
738    const auto outputs = getFinalOutputScalars(b);
739    if (LLVM_LIKELY(isStateful())) {
740        b->CreateFree(mHandle);
741    }
742    mHandle = nullptr;
743    if (outputs.empty()) {
744        b->CreateRetVoid();
745    } else {
746        const auto n = outputs.size();
747        if (n == 1) {
748            b->CreateRet(outputs[0]);
749        } else {
750            b->CreateAggregateRet(outputs.data(), n);
751        }
752    }
753
754    b->setKernel(storedKernel);
755    mCurrentMethod = nullptr;
756}
757
758/** ------------------------------------------------------------------------------------------------------------- *
759 * @brief callGenerateFinalizeMethod
760 ** ------------------------------------------------------------------------------------------------------------- */
761std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
762    const auto n = mOutputScalars.size();
763    std::vector<Value *> outputs(n);
764    for (unsigned i = 0; i < n; ++i) {
765        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
766    }
767    return outputs;
768}
769
770/** ------------------------------------------------------------------------------------------------------------- *
771 * @brief getCacheName
772 ** ------------------------------------------------------------------------------------------------------------- */
773std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & b) const {
774    std::stringstream cacheName;
775    cacheName << getName() << '_' << b->getBuilderUniqueName();
776    return cacheName.str();
777}
778
779/** ------------------------------------------------------------------------------------------------------------- *
780 * @brief setModule
781 ** ------------------------------------------------------------------------------------------------------------- */
782Module * Kernel::setModule(Module * const module) {
783    assert (mModule == nullptr || mModule == module);
784    assert (module != nullptr);
785    mModule = module;
786    return mModule;
787}
788
789/** ------------------------------------------------------------------------------------------------------------- *
790 * @brief makeModule
791 ** ------------------------------------------------------------------------------------------------------------- */
792Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
793    Module * m = new Module(getCacheName(b), b->getContext());
794    m->setTargetTriple(b->getModule()->getTargetTriple());
795    m->setDataLayout(b->getModule()->getDataLayout());
796    return setModule(m);
797}
798
799
800/** ------------------------------------------------------------------------------------------------------------- *
801 * @brief getInitFunction
802 ** ------------------------------------------------------------------------------------------------------------- */
803Function * Kernel::getInitFunction(Module * const module) const {
804    const auto name = getName() + INIT_SUFFIX;
805    Function * f = module->getFunction(name);
806    if (LLVM_UNLIKELY(f == nullptr)) {
807        llvm_unreachable("cannot find Initialize function");
808    }
809    return f;
810}
811
812/** ------------------------------------------------------------------------------------------------------------- *
813 * @brief getDoSegmentFunction
814 ** ------------------------------------------------------------------------------------------------------------- */
815Function * Kernel::getDoSegmentFunction(Module * const module) const {
816    const auto name = getName() + DO_SEGMENT_SUFFIX;
817    Function * f = module->getFunction(name);
818    if (LLVM_UNLIKELY(f == nullptr)) {
819        llvm_unreachable("cannot find DoSegment function");
820    }
821    return f;
822}
823
824/** ------------------------------------------------------------------------------------------------------------- *
825 * @brief getTerminateFunction
826 ** ------------------------------------------------------------------------------------------------------------- */
827Function * Kernel::getTerminateFunction(Module * const module) const {
828    const auto name = getName() + TERMINATE_SUFFIX;
829    Function * f = module->getFunction(name);
830    if (LLVM_UNLIKELY(f == nullptr)) {
831        llvm_unreachable("cannot find Terminate function");
832    }
833    return f;
834}
835
836/** ------------------------------------------------------------------------------------------------------------- *
837 * @brief isStateful
838 ** ------------------------------------------------------------------------------------------------------------- */
839LLVM_READNONE bool Kernel::isStateful() const {
840    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
841        llvm_unreachable("kernel state must be constructed prior to calling isStateful");
842    }
843    return !mKernelStateType->isEmptyTy();
844}
845
846
847/** ------------------------------------------------------------------------------------------------------------- *
848 * @brief prepareKernel
849 ** ------------------------------------------------------------------------------------------------------------- */
850void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
851    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
852        llvm_unreachable("Cannot call prepareKernel after constructing kernel state type");
853    }
854    if (LLVM_UNLIKELY(mStride == 0)) {
855        report_fatal_error(getName() + ": stride cannot be 0");
856    }
857    addBaseKernelProperties(b);
858    addInternalKernelProperties(b);
859    // NOTE: StructType::create always creates a new type even if an identical one exists.
860    if (LLVM_UNLIKELY(mModule == nullptr)) {
861        makeModule(b);
862    }
863    mKernelStateType = mModule->getTypeByName(getName());
864    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
865        std::vector<Type *> fields;
866        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
867        for (const Binding & scalar : mInputScalars) {
868            assert (scalar.getType());
869            fields.push_back(scalar.getType());
870        }
871        for (const Binding & scalar : mOutputScalars) {
872            assert (scalar.getType());
873            fields.push_back(scalar.getType());
874        }
875        for (const Binding & scalar : mInternalScalars) {
876            assert (scalar.getType());
877            fields.push_back(scalar.getType());
878        }
879        mKernelStateType = StructType::create(b->getContext(), fields, getName());
880    }
881    assert (isa<StructType>(mKernelStateType));
882}
883
884/** ------------------------------------------------------------------------------------------------------------- *
885 * @brief addInternalScalar
886 ** ------------------------------------------------------------------------------------------------------------- */
887void Kernel::addInternalScalar(Type * type, const StringRef name) {
888    const auto index = mInternalScalars.size();
889    mInternalScalars.emplace_back(type, name);
890    addScalarToMap(name, ScalarType::Internal, index);
891}
892
893/** ------------------------------------------------------------------------------------------------------------- *
894 * @brief addLocalScalar
895 ** ------------------------------------------------------------------------------------------------------------- */
896void Kernel::addLocalScalar(Type * type, const StringRef name) {
897    const auto index = mLocalScalars.size();
898    mLocalScalars.emplace_back(type, name);
899    addScalarToMap(name, ScalarType::Local, index);
900}
901
902/** ------------------------------------------------------------------------------------------------------------- *
903 * @brief prepareCachedKernel
904 ** ------------------------------------------------------------------------------------------------------------- */
905void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
906    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
907        llvm_unreachable("Cannot call prepareCachedKernel after constructing kernel state type");
908    }
909    addBaseKernelProperties(b);
910    mKernelStateType = getModule()->getTypeByName(getName());
911    // If we have a stateless object, the type would be optimized out of the
912    // cached IR. Consequently, we create a dummy "empty struct" to simplify
913    // the assumptions of the other Kernel functions.
914    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
915        mKernelStateType = StructType::get(b->getContext());
916    }
917    assert (isa<StructType>(mKernelStateType));
918}
919
920/** ------------------------------------------------------------------------------------------------------------- *
921 * @brief makeSignature
922 *
923 * Default kernel signature: generate the IR and emit as byte code.
924 ** ------------------------------------------------------------------------------------------------------------- */
925std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
926    if (LLVM_UNLIKELY(hasSignature())) {
927        generateKernel(b);
928        std::string tmp;
929        raw_string_ostream signature(tmp);
930        WriteBitcodeToFile(getModule(), signature);
931        return signature.str();
932    } else {
933        return getModule()->getModuleIdentifier();
934    }
935}
936
937/** ------------------------------------------------------------------------------------------------------------- *
938 * @brief getStringHash
939 *
940 * Create a fixed length string hash of the given str
941 ** ------------------------------------------------------------------------------------------------------------- */
942std::string Kernel::getStringHash(const StringRef str) {
943
944    uint32_t digest[5]; // 160 bits in total
945    boost::uuids::detail::sha1 sha1;
946    sha1.process_bytes(str.data(), str.size());
947    sha1.get_digest(digest);
948
949    std::string buffer;
950    buffer.reserve((5 * 8) + 1);
951    raw_string_ostream out(buffer);
952    for (unsigned i = 0; i < 5; ++i) {
953        out << format_hex_no_prefix(digest[i], 8);
954    }
955    out.flush();
956
957    return buffer;
958}
959
960/** ------------------------------------------------------------------------------------------------------------- *
961 * @brief createInstance
962 ** ------------------------------------------------------------------------------------------------------------- */
963Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) const {
964    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
965        llvm_unreachable("Kernel state must be constructed prior to calling createInstance");
966    }
967    if (LLVM_LIKELY(isStateful())) {
968        Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
969        Value * handle = nullptr;
970        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
971            handle = b->CreateAlignedMalloc(size, b->getPageSize());
972            b->CreateMProtect(handle, size, CBuilder::Protect::READ);
973        } else {
974            handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
975        }
976        return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
977    }
978    llvm_unreachable("createInstance should not be called on stateless kernels");
979    return nullptr;
980}
981
982/** ------------------------------------------------------------------------------------------------------------- *
983 * @brief initializeInstance
984 ** ------------------------------------------------------------------------------------------------------------- */
985void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
986    assert (args.size() == getNumOfScalarInputs() + 1);
987    assert (args[0] && "cannot initialize before creation");
988    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
989    b->setKernel(this);
990    Function * const init = getInitFunction(b->getModule());
991    b->CreateCall(init, args);
992}
993
994/** ------------------------------------------------------------------------------------------------------------- *
995 * @brief finalizeInstance
996 ** ------------------------------------------------------------------------------------------------------------- */
997Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b, Value * const handle) const {
998    Value * result = nullptr;
999    Function * const termFunc = getTerminateFunction(b->getModule());
1000    if (LLVM_LIKELY(isStateful())) {
1001        result = b->CreateCall(termFunc, { handle });
1002    } else {
1003        result = b->CreateCall(termFunc);
1004    }
1005    if (mOutputScalars.empty()) {
1006        assert (!result || result->getType()->isVoidTy());
1007        result = nullptr;
1008    }
1009    return result;
1010
1011}
1012
1013/** ------------------------------------------------------------------------------------------------------------- *
1014 * @brief getScalarField
1015 ** ------------------------------------------------------------------------------------------------------------- */
1016const Kernel::ScalarField & Kernel::getScalarField(const StringRef name) const {
1017    assert (!mScalarMap.empty());
1018    const auto f = mScalarMap.find(name);
1019    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
1020        assert (!"could not find scalar!");
1021        report_fatal_error(getName() + " does not contain scalar: " + name);
1022    }
1023    return f->second;
1024}
1025
1026/** ------------------------------------------------------------------------------------------------------------- *
1027 * @brief getScalarFieldPtr
1028 ** ------------------------------------------------------------------------------------------------------------- */
1029Value * Kernel::getScalarFieldPtr(KernelBuilder & b, const StringRef name) const {
1030    const auto & field = getScalarField(name);
1031    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
1032        llvm_unreachable("Kernel state must be constructed prior to calling getScalarFieldPtr");
1033    }
1034    unsigned index = field.Index;
1035    switch (field.Type) {
1036        case ScalarType::Local:
1037            return mLocalScalarPtr[index];
1038        case ScalarType::Internal:
1039            index += mOutputScalars.size();
1040        case ScalarType::Output:
1041            index += mInputScalars.size();
1042        case ScalarType::Input:
1043            break;
1044    }
1045    assert (index < mKernelStateType->getStructNumElements());
1046    return b.CreateGEP(getHandle(), {b.getInt32(0), b.getInt32(index)});
1047}
1048
1049/** ------------------------------------------------------------------------------------------------------------- *
1050 * @brief initializeLocalScalarValues
1051 ** ------------------------------------------------------------------------------------------------------------- */
1052void Kernel::initializeLocalScalarValues(const std::unique_ptr<KernelBuilder> & b) {
1053    if (LLVM_LIKELY(mLocalScalars.empty())) {
1054        return;
1055    }
1056    mLocalScalarPtr.resize(mLocalScalars.size());
1057    const auto end = mScalarMap.end();
1058    for (auto i = mScalarMap.begin(); i != end; ++i) {
1059        ScalarField & field = i->getValue();
1060        if (LLVM_UNLIKELY(field.Type == ScalarType::Local)) {
1061            const auto index = field.Index;
1062            const Binding & local = mLocalScalars[index];
1063            Value * const scalar = b->CreateAlloca(local.getType());
1064            b->CreateStore(ConstantAggregateZero::get(local.getType()), scalar);
1065            mLocalScalarPtr[index] = scalar;
1066        }
1067    }
1068}
1069
1070/** ------------------------------------------------------------------------------------------------------------- *
1071 * @brief getInputScalarBinding
1072 ** ------------------------------------------------------------------------------------------------------------- */
1073Binding & Kernel::getInputScalarBinding(const StringRef name) {
1074    const ScalarField & field = getScalarField(name);
1075    if (LLVM_UNLIKELY(field.Type != ScalarType::Input)) {
1076        report_fatal_error(getName() + "." + name + "is not an input scalar");
1077    }
1078    return mInputScalars[field.Index];
1079}
1080
1081/** ------------------------------------------------------------------------------------------------------------- *
1082 * @brief getOutputScalarBinding
1083 ** ------------------------------------------------------------------------------------------------------------- */
1084Binding & Kernel::getOutputScalarBinding(const StringRef name) {
1085    const ScalarField & field = getScalarField(name);
1086    if (LLVM_UNLIKELY(field.Type != ScalarType::Output)) {
1087        report_fatal_error(getName() + "." + name + "is not an output scalar");
1088    }
1089    return mOutputScalars[field.Index];
1090}
1091
1092/** ------------------------------------------------------------------------------------------------------------- *
1093 * @brief getStreamPort
1094 ** ------------------------------------------------------------------------------------------------------------- */
1095Kernel::StreamSetPort Kernel::getStreamPort(const StringRef name) const {
1096    const auto f = mStreamSetMap.find(name);
1097    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
1098        assert (!"could not find stream set!");
1099        report_fatal_error(getName() + " does not contain stream set " + name);
1100    }
1101    return f->second;
1102}
1103
1104/** ------------------------------------------------------------------------------------------------------------- *
1105 * @brief getBinding
1106 ** ------------------------------------------------------------------------------------------------------------- */
1107const Binding & Kernel::getStreamBinding(const StringRef name) const {
1108    Port port; unsigned index;
1109    std::tie(port, index) = getStreamPort(name);
1110    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
1111}
1112
1113/** ------------------------------------------------------------------------------------------------------------- *
1114 * @brief getLowerBound
1115 ** ------------------------------------------------------------------------------------------------------------- */
1116RateValue Kernel::getLowerBound(const Binding & binding) const {
1117    const ProcessingRate & rate = binding.getRate();
1118    if (rate.hasReference()) {
1119        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
1120    } else {
1121        return rate.getLowerBound();
1122    }
1123}
1124
1125/** ------------------------------------------------------------------------------------------------------------- *
1126 * @brief getUpperBound
1127 ** ------------------------------------------------------------------------------------------------------------- */
1128RateValue Kernel::getUpperBound(const Binding & binding) const {
1129    const ProcessingRate & rate = binding.getRate();
1130    if (rate.hasReference()) {
1131        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
1132    } else {
1133        return rate.getUpperBound();
1134    }
1135}
1136
1137/** ------------------------------------------------------------------------------------------------------------- *
1138 * @brief isCountable
1139 ** ------------------------------------------------------------------------------------------------------------- */
1140bool Kernel::isCountable(const Binding & binding) const {
1141    if (LLVM_UNLIKELY(binding.isDeferred())) {
1142        return false;
1143    }
1144    const ProcessingRate & rate = binding.getRate();
1145    return rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount();
1146}
1147
1148/** ------------------------------------------------------------------------------------------------------------- *
1149 * @brief isAddressable
1150 ** ------------------------------------------------------------------------------------------------------------- */
1151bool Kernel::isAddressable(const Binding & binding) const {
1152    if (LLVM_UNLIKELY(binding.isDeferred())) {
1153        return true;
1154    }
1155    const ProcessingRate & rate = binding.getRate();
1156    return rate.isBounded() || rate.isUnknown();
1157}
1158
1159/** ------------------------------------------------------------------------------------------------------------- *
1160 * @brief requiresOverflow
1161 ** ------------------------------------------------------------------------------------------------------------- */
1162bool Kernel::requiresOverflow(const Binding & binding) const {
1163    const ProcessingRate & rate = binding.getRate();
1164    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
1165        return false;
1166    } else if (rate.isRelative()) {
1167        return requiresOverflow(getStreamBinding(rate.getReference()));
1168    } else {
1169        return true;
1170    }
1171}
1172
1173/** ------------------------------------------------------------------------------------------------------------- *
1174 * @brief initializeBindings
1175 ** ------------------------------------------------------------------------------------------------------------- */
1176void Kernel::initializeBindings(BaseDriver & driver) {
1177
1178    for (unsigned i = 0; i < mInputScalars.size(); i++) {
1179        Binding & input = mInputScalars[i];
1180        addScalarToMap(input.getName(), ScalarType::Input, i);
1181        if (input.getRelationship() == nullptr) {
1182            input.setRelationship(driver.CreateScalar(input.getType()));
1183        }
1184    }
1185    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
1186        Binding & input = mInputStreamSets[i];
1187        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
1188            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
1189        }
1190        addStreamToMap(input.getName(), Port::Input, i);
1191    }
1192    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
1193        Binding & output = mOutputStreamSets[i];
1194        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
1195            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
1196        }
1197        addStreamToMap(output.getName(), Port::Output, i);
1198    }
1199    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
1200        const Binding & internal = mInternalScalars[i];
1201        addScalarToMap(internal.getName(), ScalarType::Internal, i);
1202    }
1203    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
1204        Binding & output = mOutputScalars[i];
1205        addScalarToMap(output.getName(), ScalarType::Output, i);
1206        if (output.getRelationship() == nullptr) {
1207            output.setRelationship(driver.CreateScalar(output.getType()));
1208        }
1209    }
1210}
1211
1212/** ------------------------------------------------------------------------------------------------------------- *
1213 * @brief setInputStreamSetAt
1214 ** ------------------------------------------------------------------------------------------------------------- */
1215void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
1216    mInputStreamSets[i].setRelationship(value);
1217}
1218
1219/** ------------------------------------------------------------------------------------------------------------- *
1220 * @brief setOutputStreamSetAt
1221 ** ------------------------------------------------------------------------------------------------------------- */
1222void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
1223    mOutputStreamSets[i].setRelationship(value);
1224}
1225
1226/** ------------------------------------------------------------------------------------------------------------- *
1227 * @brief setInputScalarAt
1228 ** ------------------------------------------------------------------------------------------------------------- */
1229void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
1230    mInputScalars[i].setRelationship(value);
1231}
1232
1233/** ------------------------------------------------------------------------------------------------------------- *
1234 * @brief setOutputScalarAt
1235 ** ------------------------------------------------------------------------------------------------------------- */
1236void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
1237    mOutputScalars[i].setRelationship(value);
1238}
1239
1240/** ------------------------------------------------------------------------------------------------------------- *
1241 * @brief generateKernelMethod
1242 ** ------------------------------------------------------------------------------------------------------------- */
1243void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
1244    generateDoSegmentMethod(b);
1245}
1246
1247/** ------------------------------------------------------------------------------------------------------------- *
1248 * @brief annotateKernelNameWithDebugFlags
1249 ** ------------------------------------------------------------------------------------------------------------- */
1250inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
1251    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
1252        name += "_EA";
1253    }
1254    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
1255        name += "_MP";
1256    }
1257    name += "_O" + std::to_string((int)codegen::OptLevel);
1258    return name;
1259}
1260
1261/** ------------------------------------------------------------------------------------------------------------- *
1262 * @brief getDefaultFamilyName
1263 ** ------------------------------------------------------------------------------------------------------------- */
1264std::string Kernel::getDefaultFamilyName() const {
1265    std::string tmp;
1266    raw_string_ostream out(tmp);
1267    if (LLVM_LIKELY(isStateful())) {
1268        out << "F";
1269    } else {
1270        out << "L";
1271    }
1272    out << getStride();
1273    AttributeSet::print(out);
1274    for (const Binding & input : mInputScalars) {
1275        out << ",IV("; input.print(this, out); out << ')';
1276    }
1277    for (const Binding & input : mInputStreamSets) {
1278        out << ",IS("; input.print(this, out); out << ')';
1279    }
1280    for (const Binding & output : mOutputStreamSets) {
1281        out << ",OS("; output.print(this, out); out << ')';
1282    }
1283    for (const Binding & output : mOutputScalars) {
1284        out << ",OV("; output.print(this, out); out << ')';
1285    }
1286    out.flush();
1287    return tmp;
1288}
1289
1290// CONSTRUCTOR
1291Kernel::Kernel(const std::unique_ptr<KernelBuilder> & b,
1292               const TypeId typeId,
1293               std::string && kernelName,
1294               Bindings && stream_inputs,
1295               Bindings && stream_outputs,
1296               Bindings && scalar_inputs,
1297               Bindings && scalar_outputs,
1298               Bindings && internal_scalars)
1299: mIsGenerated(false)
1300, mHandle(nullptr)
1301, mModule(nullptr)
1302, mKernelStateType(nullptr)
1303, mInputStreamSets(std::move(stream_inputs))
1304, mOutputStreamSets(std::move(stream_outputs))
1305, mInputScalars(std::move(scalar_inputs))
1306, mOutputScalars(std::move(scalar_outputs))
1307, mInternalScalars( std::move(internal_scalars))
1308, mCurrentMethod(nullptr)
1309, mStride(b->getBitBlockWidth())
1310, mTerminationSignalPtr(nullptr)
1311, mIsFinal(nullptr)
1312, mNumOfStrides(nullptr)
1313, mKernelName(annotateKernelNameWithDebugFlags(std::move(kernelName)))
1314, mTypeId(typeId) {
1315
1316}
1317
1318Kernel::~Kernel() { }
1319
1320// CONSTRUCTOR
1321SegmentOrientedKernel::SegmentOrientedKernel(const std::unique_ptr<KernelBuilder> & b,
1322                                             std::string && kernelName,
1323                                             Bindings && stream_inputs,
1324                                             Bindings && stream_outputs,
1325                                             Bindings && scalar_parameters,
1326                                             Bindings && scalar_outputs,
1327                                             Bindings && internal_scalars)
1328: Kernel(b,
1329TypeId::SegmentOriented, std::move(kernelName),
1330std::move(stream_inputs), std::move(stream_outputs),
1331std::move(scalar_parameters), std::move(scalar_outputs),
1332std::move(internal_scalars)) {
1333
1334}
1335
1336
1337}
Note: See TracBrowser for help on using the repository browser.