source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 6209

Last change on this file since 6209 was 6209, checked in by nmedfort, 6 months ago

Initial cache janitor daemon test

File size: 47.9 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <toolchain/driver.h>
9#include <kernels/relationship.h>
10#include <kernels/streamset.h>
11#include <kernels/kernel_builder.h>
12#include <llvm/IR/CallingConv.h>
13#include <llvm/IR/DerivedTypes.h>
14#include <llvm/IR/Constants.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/MDBuilder.h>
18#include <llvm/IR/Module.h>
19#include <llvm/Support/raw_ostream.h>
20#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
21#include <llvm/Bitcode/ReaderWriter.h>
22#else
23#include <llvm/Bitcode/BitcodeWriter.h>
24#endif
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/Support/Debug.h>
27#include <boost/uuid/sha1.hpp>
28#include <llvm/Support/Format.h>
29#include <sstream>
30
31using namespace llvm;
32using namespace boost;
33
34namespace kernel {
35
36using AttrId = Attribute::KindId;
37using RateValue = ProcessingRate::RateValue;
38using RateId = ProcessingRate::KindId;
39using StreamPort = Kernel::StreamSetPort;
40using Port = Kernel::Port;
41
42// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
43// within the kernel state to hide the complexity from the user?
44
45const static auto INIT_SUFFIX = "_Init";
46const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
47const static auto TERMINATE_SUFFIX = "_Terminate";
48
49/** ------------------------------------------------------------------------------------------------------------- *
50 * @brief setInstance
51 ** ------------------------------------------------------------------------------------------------------------- */
52void  Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
53    assert ("handle cannot be null!" && handle);
54    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
55    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
56    #ifndef NDEBUG
57    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
58    const Function * const builderFunction = b->GetInsertBlock()->getParent();
59    assert ("handle is not from the current function." && (handleFunction == builderFunction));
60    #endif
61    mHandle = handle;
62}
63
64/** ------------------------------------------------------------------------------------------------------------- *
65 * @brief isLocalBuffer
66 ** ------------------------------------------------------------------------------------------------------------- */
67inline bool isLocalBuffer(const Binding & output) {
68    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
69}
70
71/** ------------------------------------------------------------------------------------------------------------- *
72 * @brief addBaseKernelProperties
73 *
74 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
75 ** ------------------------------------------------------------------------------------------------------------- */
76void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
77
78    // Set the default kernel stride.
79    if (mStride == 0) {
80        mStride = b->getBitBlockWidth();
81    }
82
83    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
84    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
85    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
86    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
87
88    mStreamSetInputBuffers.clear();
89    const auto numOfInputStreams = mInputStreamSets.size();
90    mStreamSetInputBuffers.reserve(numOfInputStreams);
91    for (unsigned i = 0; i < numOfInputStreams; ++i) {
92        const Binding & input = mInputStreamSets[i];
93        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
94    }
95
96    mStreamSetOutputBuffers.clear();
97    const auto numOfOutputStreams = mOutputStreamSets.size();
98    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
99    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
100        const Binding & output = mOutputStreamSets[i];
101        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
102    }
103
104    IntegerType * const sizeTy = b->getSizeTy();
105    PointerType * const sizePtrPtrTy = sizeTy->getPointerTo()->getPointerTo();
106
107    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
108    addInternalScalar(sizeTy, TERMINATION_SIGNAL);
109
110    // TODO: if we had a way of easily calculating the number of processed/produced items of the
111    // final stride of a non-deferred fixed rate stream, we could avoid storing the item counts.
112    for (unsigned i = 0; i < numOfInputStreams; ++i) {
113        const Binding & input = mInputStreamSets[i];
114        addInternalScalar(sizeTy, input.getName() + PROCESSED_ITEM_COUNT_SUFFIX);
115        if (LLVM_UNLIKELY(input.isDeferred())) {
116            addInternalScalar(sizeTy, input.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
117        }
118    }
119
120    // If an output is a managed buffer, we need to store both the buffer and a set of consumers.
121    Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy})->getPointerTo();
122    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
123        const Binding & output = mOutputStreamSets[i];
124        addInternalScalar(sizeTy, output.getName() + PRODUCED_ITEM_COUNT_SUFFIX);
125        if (LLVM_UNLIKELY(output.isDeferred())) {
126            addInternalScalar(sizeTy, output.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
127        }
128        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
129            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
130            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
131            addInternalScalar(consumerSetTy, output.getName() + CONSUMER_SUFFIX);
132            addInternalScalar(sizeTy, output.getName() + CONSUMED_ITEM_COUNT_SUFFIX);
133        }
134    }
135
136    // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
137    // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
138    // will be able to add instrumentation to cached modules without recompilation.
139    addInternalScalar(b->getInt64Ty(), CYCLECOUNT_SCALAR);
140
141}
142
143/** ------------------------------------------------------------------------------------------------------------- *
144 * @brief addScalarToMap
145 ** ------------------------------------------------------------------------------------------------------------- */
146void Kernel::addScalarToMap(const llvm::StringRef name, const ScalarType scalarType, const unsigned index) {
147    const auto r = mScalarMap.insert(std::make_pair(name, ScalarField{scalarType, index}));
148    if (LLVM_UNLIKELY(!r.second)) {
149        const ScalarField & sf = r.first->second;
150        if (LLVM_UNLIKELY(sf.type != scalarType || sf.index != index)) {
151            report_fatal_error(getName() + " already contains scalar " + name);
152        }
153    }
154}
155
156/** ------------------------------------------------------------------------------------------------------------- *
157 * @brief addScalarToMap
158 ** ------------------------------------------------------------------------------------------------------------- */
159void Kernel::addStreamToMap(const llvm::StringRef name, const Port port, const unsigned index) {
160    const auto r = mStreamSetMap.insert(std::make_pair(name, std::make_pair(port, index)));
161    if (LLVM_UNLIKELY(!r.second)) {
162        const StreamPort & sf = r.first->second;
163        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
164            report_fatal_error(getName() + " already contains stream " + name);
165        }
166    }
167}
168
169/** ------------------------------------------------------------------------------------------------------------- *
170 * @brief addKernelDeclarations
171 ** ------------------------------------------------------------------------------------------------------------- */
172void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
173    if (mKernelStateType == nullptr) {
174        throw std::runtime_error("Kernel state definition " + getName() + " has not been finalized.");
175    }
176    addInitializeDeclaration(b);
177    addDoSegmentDeclaration(b);
178    addFinalizeDeclaration(b);
179    linkExternalMethods(b);
180}
181
182/** ------------------------------------------------------------------------------------------------------------- *
183 * @brief addInitializeDeclaration
184 ** ------------------------------------------------------------------------------------------------------------- */
185void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
186
187    std::vector<Type *> params;
188    params.push_back(mKernelStateType->getPointerTo());
189    for (const Binding & binding : mInputScalars) {
190        params.push_back(binding.getType());
191    }
192
193    FunctionType * const initType = FunctionType::get(b->getVoidTy(), params, false);
194    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
195    initFunc->setCallingConv(CallingConv::C);
196    initFunc->setDoesNotThrow();
197    auto args = initFunc->arg_begin();
198    args->setName("self");
199    for (const Binding & binding : mInputScalars) {
200        (++args)->setName(binding.getName());
201    }
202
203    assert (std::next(args) == initFunc->arg_end());
204}
205
206/** ------------------------------------------------------------------------------------------------------------- *
207 * @brief callGenerateInitializeMethod
208 ** ------------------------------------------------------------------------------------------------------------- */
209void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
210    const Kernel * const storedKernel = b->getKernel();
211    b->setKernel(this);
212    Value * const storedHandle = getHandle();
213    mCurrentMethod = getInitFunction(b->getModule());
214    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
215    auto args = mCurrentMethod->arg_begin();
216    setHandle(b, &*args);
217    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
218        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
219    }
220    b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getHandle());
221    for (const auto & binding : mInputScalars) {
222        b->setScalarField(binding.getName(), &*(++args));
223    }
224
225    const auto numOfOutputs = mOutputStreamSets.size();
226    for (unsigned i = 0; i < numOfOutputs; i++) {
227        const Binding & output = mOutputStreamSets[i];
228        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
229            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
230            mStreamSetOutputBuffers[i]->setHandle(b, handle);
231        }
232    }
233    generateInitializeMethod(b);
234    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
235        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
236    }
237    b->CreateRetVoid();
238    b->setKernel(storedKernel);
239    mHandle = storedHandle;
240    mCurrentMethod = nullptr;
241}
242
243/** ------------------------------------------------------------------------------------------------------------- *
244 * @brief addDoSegmentDeclaration
245 ** ------------------------------------------------------------------------------------------------------------- */
246void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
247
248    IntegerType * const sizeTy = b->getSizeTy();
249    PointerType * const sizePtrTy = sizeTy->getPointerTo();
250    Type * const voidTy = b->getVoidTy();
251
252    std::vector<Type *> params;
253    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
254    params.push_back(mKernelStateType->getPointerTo());  // self
255    params.push_back(sizeTy); // numOfStrides
256    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
257        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
258        params.push_back(bufferType->getPointerTo()); // logical "base" input address
259        params.push_back(sizeTy);  // accessible input items (after non-deferred processed item count)
260        const Binding & input = mInputStreamSets[i];
261        unsigned numOfPopCountArrays = 0;
262        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
263            ++numOfPopCountArrays;
264        }
265        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
266            ++numOfPopCountArrays;
267        }
268        if (numOfPopCountArrays) {
269            params.insert(params.end(), numOfPopCountArrays, sizePtrTy); // popCountRef array (length is numOfStrides)
270        }
271    }
272    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
273        const Binding & output = mOutputStreamSets[i];
274        if (LLVM_LIKELY(!isLocalBuffer(output))) {
275            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
276            params.push_back(bufferType->getPointerTo()); // logical "base" output address
277            params.push_back(sizeTy); // writable output items (after non-deferred produced item count)
278        }
279    }
280
281    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
282    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
283    doSegment->setCallingConv(CallingConv::C);
284    doSegment->setDoesNotThrow();
285    auto args = doSegment->arg_begin();
286    args->setName("self");
287    (++args)->setName("numOfStrides");
288    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
289        const Binding & input = mInputStreamSets[i];
290        (++args)->setName(input.getName());
291        (++args)->setName(input.getName() + "_accessible");
292        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
293            (++args)->setName(input.getName() + "_popCountArray");
294        }
295        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
296            (++args)->setName(input.getName() + "_negatedPopCountArray");
297        }
298    }
299    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
300        const Binding & output = mOutputStreamSets[i];
301        if (LLVM_LIKELY(!isLocalBuffer(output))) {
302            (++args)->setName(output.getName());
303            (++args)->setName(output.getName() + "_writable");
304        }
305    }
306    assert (std::next(args) == doSegment->arg_end());
307}
308
309/** ------------------------------------------------------------------------------------------------------------- *
310 * @brief callGenerateKernelMethod
311 ** ------------------------------------------------------------------------------------------------------------- */
312void Kernel::callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
313
314    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
315    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
316
317    const Kernel * const storedKernel = b->getKernel();
318    b->setKernel(this);
319    Value * const storedHandle = getHandle();
320    mCurrentMethod = getDoSegmentFunction(b->getModule());
321    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
322    auto args = mCurrentMethod->arg_begin();
323    setHandle(b, &*(args++));
324    mNumOfStrides = &*(args++);
325    mIsFinal = b->CreateIsNull(mNumOfStrides);
326    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
327        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
328    }
329    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
330    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
331    // and call system here but that is not an ideal way of handling this.
332
333    // TODO: use a graph to depict relations between binding? It would be better to first move to a model
334    // where inputs and outputs are contained in a single parameter vector.
335
336    const auto numOfInputs = getNumOfStreamInputs();
337    mAccessibleInputItems.resize(numOfInputs, nullptr);
338    mAvailableInputItems.resize(numOfInputs, nullptr);
339    mPopCountRateArray.resize(numOfInputs, nullptr);
340    mNegatedPopCountRateArray.resize(numOfInputs, nullptr);
341    for (unsigned i = 0; i < numOfInputs; i++) {
342        const Binding & input = mInputStreamSets[i];
343        assert (args != mCurrentMethod->arg_end());
344        Value * const addr = &*(args++);
345        auto & buffer = mStreamSetInputBuffers[i];
346        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
347        buffer->setHandle(b, localHandle);
348        buffer->setBaseAddress(b.get(), addr);
349        assert (args != mCurrentMethod->arg_end());
350        Value * const accessible = &*(args++);
351        mAccessibleInputItems[i] = accessible;
352        Value * const processed = b->getNonDeferredProcessedItemCount(input);
353        Value * capacity = b->CreateAdd(processed, accessible);
354        mAvailableInputItems[i] = capacity;
355        if (input.hasLookahead()) {
356            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
357        }
358        buffer->setCapacity(b.get(), capacity);
359        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
360            assert (args != mCurrentMethod->arg_end());
361            mPopCountRateArray[i] = &*(args++);
362        }
363        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
364            assert (args != mCurrentMethod->arg_end());
365            mNegatedPopCountRateArray[i] = &*(args++);
366        }
367    }
368
369    // set all of the output buffers
370    const auto numOfOutputs = getNumOfStreamOutputs();
371    mWritableOutputItems.resize(numOfOutputs, nullptr);
372    for (unsigned i = 0; i < numOfOutputs; i++) {
373        // If an output is a managed buffer, the address is stored within the state instead
374        // of being passed in through the function call.
375        auto & buffer = mStreamSetOutputBuffers[i];
376        const Binding & output = mOutputStreamSets[i];
377        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
378            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
379            buffer->setHandle(b, handle);
380        } else {
381            assert (args != mCurrentMethod->arg_end());
382            Value * const logicalBaseAddress = &*(args++);
383            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
384            buffer->setHandle(b, localHandle);
385            buffer->setBaseAddress(b.get(), logicalBaseAddress);
386            assert (args != mCurrentMethod->arg_end());
387            Value * const writable = &*(args++);
388            mWritableOutputItems[i] = writable;
389            Value * const produced = b->getNonDeferredProducedItemCount(output);
390            Value * const capacity = b->CreateAdd(produced, writable);
391            buffer->setCapacity(b.get(), capacity);
392        }
393    }
394    assert (args == mCurrentMethod->arg_end());
395
396    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
397        Value * const terminated = b->getTerminationSignal();
398        b->CreateAssert(b->CreateNot(terminated), getName() + " was called after termination");
399    }
400
401    // Calculate and/or load the accessible and writable item counts. If they are unneeded,
402    // LLVM ought to recognize them as dead code and remove them.
403    generateKernelMethod(b); // must be overridden by the Kernel subtype
404    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
405        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
406    }
407    b->CreateRetVoid();
408
409    // Clean up all of the constructed buffers.
410    b->setKernel(storedKernel);
411    mHandle = storedHandle;
412    mCurrentMethod = nullptr;
413    mIsFinal = nullptr;
414    mNumOfStrides = nullptr;
415    mAccessibleInputItems.clear();
416    mPopCountRateArray.clear();
417    mNegatedPopCountRateArray.clear();
418}
419
420/** ------------------------------------------------------------------------------------------------------------- *
421 * @brief addFinalizeDeclaration
422 ** ------------------------------------------------------------------------------------------------------------- */
423void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
424    Type * resultType = nullptr;
425    if (mOutputScalars.empty()) {
426        resultType = b->getVoidTy();
427    } else {
428        const auto n = mOutputScalars.size();
429        Type * outputType[n];
430        for (unsigned i = 0; i < n; ++i) {
431            outputType[i] = mOutputScalars[i].getType();
432        }
433        if (n == 1) {
434            resultType = outputType[0];
435        } else {
436            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
437        }
438    }
439    PointerType * const selfType = mKernelStateType->getPointerTo();
440    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
441    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
442    terminateFunc->setCallingConv(CallingConv::C);
443    terminateFunc->setDoesNotThrow();
444    auto args = terminateFunc->arg_begin();
445    args->setName("self");
446    assert (std::next(args) == terminateFunc->arg_end());
447}
448
449/** ------------------------------------------------------------------------------------------------------------- *
450 * @brief callGenerateFinalizeMethod
451 ** ------------------------------------------------------------------------------------------------------------- */
452void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
453
454    const Kernel * const storedKernel = b->getKernel();
455    b->setKernel(this);
456    mCurrentMethod = getTerminateFunction(b->getModule());
457    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
458    auto args = mCurrentMethod->arg_begin();
459    setHandle(b, &*(args++));
460    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
461        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
462    }
463    const auto numOfOutputs = mOutputStreamSets.size();
464    for (unsigned i = 0; i < numOfOutputs; i++) {
465        const Binding & output = mOutputStreamSets[i];
466        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
467            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
468            mStreamSetOutputBuffers[i]->setHandle(b, handle);
469        }
470    }
471
472    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
473    const auto outputs = getFinalOutputScalars(b);
474    b->CreateFree(mHandle);
475    mHandle = nullptr;
476
477    if (outputs.empty()) {
478        b->CreateRetVoid();
479    } else {
480        const auto n = outputs.size();
481        if (n == 1) {
482            b->CreateRet(outputs[0]);
483        } else {
484            b->CreateAggregateRet(outputs.data(), n);
485        }
486    }
487
488    b->setKernel(storedKernel);
489    mCurrentMethod = nullptr;
490}
491
492/** ------------------------------------------------------------------------------------------------------------- *
493 * @brief callGenerateFinalizeMethod
494 ** ------------------------------------------------------------------------------------------------------------- */
495std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
496    const auto n = mOutputScalars.size();
497    std::vector<Value *> outputs(n);
498    for (unsigned i = 0; i < n; ++i) {
499        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
500    }
501    return outputs;
502}
503
504/** ------------------------------------------------------------------------------------------------------------- *
505 * @brief getCacheName
506 ** ------------------------------------------------------------------------------------------------------------- */
507std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & b) const {
508    std::stringstream cacheName;
509    cacheName << getName() << '_' << b->getBuilderUniqueName();
510    return cacheName.str();
511}
512
513/** ------------------------------------------------------------------------------------------------------------- *
514 * @brief setModule
515 ** ------------------------------------------------------------------------------------------------------------- */
516Module * Kernel::setModule(Module * const module) {
517    assert (mModule == nullptr || mModule == module);
518    assert (module != nullptr);
519    mModule = module;
520    return mModule;
521}
522
523/** ------------------------------------------------------------------------------------------------------------- *
524 * @brief makeModule
525 ** ------------------------------------------------------------------------------------------------------------- */
526Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
527    Module * m = new Module(getCacheName(b), b->getContext());
528    m->setTargetTriple(b->getModule()->getTargetTriple());
529    m->setDataLayout(b->getModule()->getDataLayout());
530    return setModule(m);
531}
532
533
534/** ------------------------------------------------------------------------------------------------------------- *
535 * @brief getInitFunction
536 ** ------------------------------------------------------------------------------------------------------------- */
537Function * Kernel::getInitFunction(Module * const module) const {
538    const auto name = getName() + INIT_SUFFIX;
539    Function * f = module->getFunction(name);
540    if (LLVM_UNLIKELY(f == nullptr)) {
541        report_fatal_error("Cannot find " + name);
542    }
543    return f;
544}
545
546/** ------------------------------------------------------------------------------------------------------------- *
547 * @brief getDoSegmentFunction
548 ** ------------------------------------------------------------------------------------------------------------- */
549Function * Kernel::getDoSegmentFunction(Module * const module) const {
550    const auto name = getName() + DO_SEGMENT_SUFFIX;
551    Function * f = module->getFunction(name);
552    if (LLVM_UNLIKELY(f == nullptr)) {
553        report_fatal_error("Cannot find " + name);
554    }
555    return f;
556}
557
558/** ------------------------------------------------------------------------------------------------------------- *
559 * @brief getTerminateFunction
560 ** ------------------------------------------------------------------------------------------------------------- */
561Function * Kernel::getTerminateFunction(Module * const module) const {
562    const auto name = getName() + TERMINATE_SUFFIX;
563    Function * f = module->getFunction(name);
564    if (LLVM_UNLIKELY(f == nullptr)) {
565        report_fatal_error("Cannot find " + name);
566    }
567    return f;
568}
569
570/** ------------------------------------------------------------------------------------------------------------- *
571 * @brief prepareKernel
572 ** ------------------------------------------------------------------------------------------------------------- */
573void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
574    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
575        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
576    }
577    addBaseKernelProperties(b);
578    addInternalKernelProperties(b);
579    // NOTE: StructType::create always creates a new type even if an identical one exists.
580    if (LLVM_UNLIKELY(mModule == nullptr)) {
581        makeModule(b);
582    }
583    mKernelStateType = mModule->getTypeByName(getName());
584
585
586    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
587        std::vector<llvm::Type *> fields;
588        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
589        for (const Binding & scalar : mInputScalars) {
590            assert (scalar.getType());
591            fields.push_back(scalar.getType());
592        }
593        for (const Binding & scalar : mOutputScalars) {
594            assert (scalar.getType());
595            fields.push_back(scalar.getType());
596        }
597        for (const Binding & scalar : mInternalScalars) {
598            assert (scalar.getType());
599            fields.push_back(scalar.getType());
600        }
601        mKernelStateType = StructType::create(b->getContext(), fields, getName());
602    }
603
604
605
606
607    assert (isa<StructType>(mKernelStateType));
608}
609
610/** ------------------------------------------------------------------------------------------------------------- *
611 * @brief addInternalScalar
612 ** ------------------------------------------------------------------------------------------------------------- */
613void Kernel::addInternalScalar(llvm::Type * type, const llvm::StringRef name) {
614    const auto index = mInternalScalars.size();
615    mInternalScalars.emplace_back(type, name);
616    addScalarToMap(name, ScalarType::Internal, index);
617}
618
619/** ------------------------------------------------------------------------------------------------------------- *
620 * @brief getScalarIndex
621 ** ------------------------------------------------------------------------------------------------------------- */
622unsigned Kernel::getScalarIndex(const llvm::StringRef fieldName) const {
623    const auto & field = getScalarField(fieldName);
624    assert (mKernelStateType);
625    unsigned index = field.index;
626    switch (field.type) {
627        case ScalarType::Internal:
628            index += mOutputScalars.size();
629        case ScalarType::Output:
630            index += mInputScalars.size();
631        case ScalarType::Input:
632            break;
633    }
634    assert (index < mKernelStateType->getStructNumElements());
635    return index;
636}
637
638/** ------------------------------------------------------------------------------------------------------------- *
639 * @brief prepareCachedKernel
640 ** ------------------------------------------------------------------------------------------------------------- */
641void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
642    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
643        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
644    }
645    addBaseKernelProperties(b);
646    mKernelStateType = getModule()->getTypeByName(getName());
647    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
648        report_fatal_error("Kernel definition for " + getName() + " was not found in the cache!");
649    }
650    assert (isa<StructType>(mKernelStateType));
651}
652
653/** ------------------------------------------------------------------------------------------------------------- *
654 * @brief makeSignature
655 *
656 * Default kernel signature: generate the IR and emit as byte code.
657 ** ------------------------------------------------------------------------------------------------------------- */
658std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
659    if (LLVM_UNLIKELY(hasSignature())) {
660        generateKernel(b);
661        std::string tmp;
662        raw_string_ostream signature(tmp);
663        WriteBitcodeToFile(getModule(), signature);
664        return signature.str();
665    } else {
666        return getModule()->getModuleIdentifier();
667    }
668}
669
670/** ------------------------------------------------------------------------------------------------------------- *
671 * @brief getStringHash
672 *
673 * Create a fixed length string hash of the given str
674 ** ------------------------------------------------------------------------------------------------------------- */
675std::string Kernel::getStringHash(const llvm::StringRef str) {
676
677    uint32_t digest[5]; // 160 bits in total
678    boost::uuids::detail::sha1 sha1;
679    sha1.process_bytes(str.data(), str.size());
680    sha1.get_digest(digest);
681
682    std::string buffer;
683    buffer.reserve((5 * 8) + 1);
684    raw_string_ostream out(buffer);
685    for (unsigned i = 0; i < 5; ++i) {
686        out << format_hex_no_prefix(digest[i], 8);
687    }
688    out.flush();
689
690    return buffer;
691}
692
693/** ------------------------------------------------------------------------------------------------------------- *
694 * @brief createInstance
695 ** ------------------------------------------------------------------------------------------------------------- */
696Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
697    assert (mKernelStateType && "cannot create instance before calling prepareKernel() or prepareCachedKernel()");
698    Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
699    Value * handle = nullptr;
700    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
701        handle = b->CreateAlignedMalloc(size, b->getPageSize());
702        b->CreateMProtect(handle, size, CBuilder::Protect::READ);
703    } else {
704        handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
705    }
706//    mHandle = b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
707//    return mHandle;
708    return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
709}
710
711/** ------------------------------------------------------------------------------------------------------------- *
712 * @brief initializeInstance
713 ** ------------------------------------------------------------------------------------------------------------- */
714void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
715    assert (args.size() == getNumOfScalarInputs() + 1);
716    assert (args[0] && "cannot initialize before creation");
717    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
718    b->setKernel(this);
719    Function * const init = getInitFunction(b->getModule());
720    b->CreateCall(init, args);
721}
722
723/** ------------------------------------------------------------------------------------------------------------- *
724 * @brief generateKernel
725 ** ------------------------------------------------------------------------------------------------------------- */
726void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
727    if (LLVM_UNLIKELY(mIsGenerated)) return;
728    b->setKernel(this);
729    b->setModule(mModule);
730    addKernelDeclarations(b);
731    callGenerateInitializeMethod(b);
732    callGenerateKernelMethod(b);
733    callGenerateFinalizeMethod(b);
734    addAdditionalFunctions(b);
735    mIsGenerated = true;
736}
737
738/** ------------------------------------------------------------------------------------------------------------- *
739 * @brief finalizeInstance
740 ** ------------------------------------------------------------------------------------------------------------- */
741Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
742    assert (mHandle && "was not set");
743    Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
744    mHandle = nullptr;
745    if (mOutputScalars.empty()) {
746        assert (!result || result->getType()->isVoidTy());
747        result = nullptr;
748    }
749    return result;
750
751}
752
753/** ------------------------------------------------------------------------------------------------------------- *
754 * @brief getScalarField
755 ** ------------------------------------------------------------------------------------------------------------- */
756const Kernel::ScalarField & Kernel::getScalarField(const llvm::StringRef name) const {
757    assert (!mScalarMap.empty());
758    const auto f = mScalarMap.find(name);
759    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
760        report_fatal_error(getName() + " does not contain scalar: " + name);
761    }
762    return f->second;
763}
764
765/** ------------------------------------------------------------------------------------------------------------- *
766 * @brief getInputScalarBinding
767 ** ------------------------------------------------------------------------------------------------------------- */
768Binding & Kernel::getInputScalarBinding(const llvm::StringRef name) {
769    const ScalarField & field = getScalarField(name);
770    if (LLVM_UNLIKELY(field.type != ScalarType::Input)) {
771        report_fatal_error(getName() + "." + name + "is not an input scalar");
772    }
773    return mInputScalars[field.index];
774}
775
776/** ------------------------------------------------------------------------------------------------------------- *
777 * @brief getOutputScalarBinding
778 ** ------------------------------------------------------------------------------------------------------------- */
779Binding & Kernel::getOutputScalarBinding(const llvm::StringRef name) {
780    const ScalarField & field = getScalarField(name);
781    if (LLVM_UNLIKELY(field.type != ScalarType::Output)) {
782        report_fatal_error(getName() + "." + name + "is not an output scalar");
783    }
784    return mOutputScalars[field.index];
785}
786
787/** ------------------------------------------------------------------------------------------------------------- *
788 * @brief getStreamPort
789 ** ------------------------------------------------------------------------------------------------------------- */
790Kernel::StreamSetPort Kernel::getStreamPort(const llvm::StringRef name) const {
791    const auto f = mStreamSetMap.find(name);
792    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
793        assert (!mStreamSetMap.empty());
794        report_fatal_error(getName() + " does not contain stream set " + name);
795    }
796    return f->second;
797}
798
799/** ------------------------------------------------------------------------------------------------------------- *
800 * @brief getBinding
801 ** ------------------------------------------------------------------------------------------------------------- */
802const Binding & Kernel::getStreamBinding(const llvm::StringRef name) const {
803    Port port; unsigned index;
804    std::tie(port, index) = getStreamPort(name);
805    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
806}
807
808/** ------------------------------------------------------------------------------------------------------------- *
809 * @brief getLowerBound
810 ** ------------------------------------------------------------------------------------------------------------- */
811RateValue Kernel::getLowerBound(const Binding & binding) const {
812    const ProcessingRate & rate = binding.getRate();
813    if (rate.hasReference()) {
814        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
815    } else {
816        return rate.getLowerBound();
817    }
818}
819
820/** ------------------------------------------------------------------------------------------------------------- *
821 * @brief getUpperBound
822 ** ------------------------------------------------------------------------------------------------------------- */
823RateValue Kernel::getUpperBound(const Binding & binding) const {
824    const ProcessingRate & rate = binding.getRate();
825    if (rate.hasReference()) {
826        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
827    } else {
828        return rate.getUpperBound();
829    }
830}
831
832/** ------------------------------------------------------------------------------------------------------------- *
833 * @brief isCountable
834 ** ------------------------------------------------------------------------------------------------------------- */
835bool Kernel::isCountable(const Binding & binding) const {
836    const ProcessingRate & rate = binding.getRate();
837    if (rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount()) {
838        return true;
839//    } else if (rate.isRelative()) {
840//        return isCountable(getStreamBinding(rate.getReference()));
841    } else {
842        return false;
843    }
844}
845
846/** ------------------------------------------------------------------------------------------------------------- *
847 * @brief isCalculable
848 ** ------------------------------------------------------------------------------------------------------------- */
849bool Kernel::isCalculable(const Binding & binding) const {
850    const ProcessingRate & rate = binding.getRate();
851    if (rate.isFixed() || rate.isBounded()) {
852        return true;
853    } else if (rate.isRelative()) {
854        return isCalculable(getStreamBinding(rate.getReference()));
855    } else {
856        return false;
857    }
858}
859
860/** ------------------------------------------------------------------------------------------------------------- *
861 * @brief requiresOverflow
862 ** ------------------------------------------------------------------------------------------------------------- */
863bool Kernel::requiresOverflow(const Binding & binding) const {
864    const ProcessingRate & rate = binding.getRate();
865    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
866        return false;
867    } else if (rate.isRelative()) {
868        return requiresOverflow(getStreamBinding(rate.getReference()));
869    } else {
870        return true;
871    }
872}
873
874/** ------------------------------------------------------------------------------------------------------------- *
875 * @brief isUnknownRate
876 ** ------------------------------------------------------------------------------------------------------------- */
877bool Kernel::isUnknownRate(const Binding & binding) const {
878    const ProcessingRate & rate = binding.getRate();
879    if (rate.isUnknown()) {
880        return true;
881    } else if (rate.isRelative()) {
882        return isUnknownRate(getStreamBinding(rate.getReference()));
883    } else {
884        return false;
885    }
886}
887
888/** ------------------------------------------------------------------------------------------------------------- *
889 * @brief initializeBindings
890 ** ------------------------------------------------------------------------------------------------------------- */
891void Kernel::initializeBindings(BaseDriver & driver) {
892
893    for (unsigned i = 0; i < mInputScalars.size(); i++) {
894        Binding & input = mInputScalars[i];
895        addScalarToMap(input.getName(), ScalarType::Input, i);
896        if (input.getRelationship() == nullptr) {
897            input.setRelationship(driver.CreateScalar(input.getType()));
898        }
899    }
900    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
901        Binding & input = mInputStreamSets[i];
902        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
903            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
904        }
905        addStreamToMap(input.getName(), Port::Input, i);
906    }
907    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
908        Binding & output = mOutputStreamSets[i];
909        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
910            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
911        }
912        addStreamToMap(output.getName(), Port::Output, i);
913    }
914    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
915        const Binding & internal = mInternalScalars[i];
916        addScalarToMap(internal.getName(), ScalarType::Internal, i);
917    }
918    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
919        Binding & output = mOutputScalars[i];
920        addScalarToMap(output.getName(), ScalarType::Output, i);
921        if (output.getRelationship() == nullptr) {
922            output.setRelationship(driver.CreateScalar(output.getType()));
923        }
924    }
925}
926
927/** ------------------------------------------------------------------------------------------------------------- *
928 * @brief setInputStreamSetAt
929 ** ------------------------------------------------------------------------------------------------------------- */
930void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
931    mInputStreamSets[i].setRelationship(value);
932}
933
934/** ------------------------------------------------------------------------------------------------------------- *
935 * @brief setOutputStreamSetAt
936 ** ------------------------------------------------------------------------------------------------------------- */
937void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
938    mOutputStreamSets[i].setRelationship(value);
939}
940
941/** ------------------------------------------------------------------------------------------------------------- *
942 * @brief setInputScalarAt
943 ** ------------------------------------------------------------------------------------------------------------- */
944void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
945    mInputScalars[i].setRelationship(value);
946}
947
948/** ------------------------------------------------------------------------------------------------------------- *
949 * @brief setOutputScalarAt
950 ** ------------------------------------------------------------------------------------------------------------- */
951void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
952    mOutputScalars[i].setRelationship(value);
953}
954
955/** ------------------------------------------------------------------------------------------------------------- *
956 * @brief getPopCountRateItemCount
957 ** ------------------------------------------------------------------------------------------------------------- */
958Value * Kernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, Value * const strideIndex) {
959    assert (rate.isPopCount() || rate.isNegatedPopCount());
960    Port refPort;
961    unsigned refIndex = 0;
962    std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
963    assert (refPort == Port::Input);
964    Value * array = nullptr;
965    if (rate.isNegatedPopCount()) {
966        array = mNegatedPopCountRateArray[refIndex];
967    } else {
968        array = mPopCountRateArray[refIndex];
969    }
970    assert (array && "missing pop count array attribute");
971    return b->CreateLoad(b->CreateGEP(array, strideIndex));
972}
973
974/** ------------------------------------------------------------------------------------------------------------- *
975 * @brief generateKernelMethod
976 ** ------------------------------------------------------------------------------------------------------------- */
977void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
978    generateDoSegmentMethod(b);
979}
980
981/** ------------------------------------------------------------------------------------------------------------- *
982 * @brief annotateKernelNameWithDebugFlags
983 ** ------------------------------------------------------------------------------------------------------------- */
984inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
985    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
986        name += "_EA";
987    }
988    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
989        name += "_MP";
990    }
991    name += "_O" + std::to_string((int)codegen::OptLevel);
992    return name;
993}
994
995/** ------------------------------------------------------------------------------------------------------------- *
996 * @brief getDefaultFamilyName
997 ** ------------------------------------------------------------------------------------------------------------- */
998std::string Kernel::getDefaultFamilyName() const {
999    std::string tmp;
1000    llvm::raw_string_ostream out(tmp);
1001    out << "F";
1002    out << getStride();
1003    AttributeSet::print(out);
1004    for (const Binding & input : mInputScalars) {
1005        out << ",IV("; input.print(this, out); out << ')';
1006    }
1007    for (const Binding & input : mInputStreamSets) {
1008        out << ",IS("; input.print(this, out); out << ')';
1009    }
1010    for (const Binding & output : mOutputStreamSets) {
1011        out << ",OS("; output.print(this, out); out << ')';
1012    }
1013    for (const Binding & output : mOutputScalars) {
1014        out << ",OV("; output.print(this, out); out << ')';
1015    }
1016    out.flush();
1017    return tmp;
1018}
1019
1020// CONSTRUCTOR
1021Kernel::Kernel(std::string && kernelName,
1022               Bindings && stream_inputs,
1023               Bindings && stream_outputs,
1024               Bindings && scalar_inputs,
1025               Bindings && scalar_outputs,
1026               Bindings && internal_scalars)
1027: mIsGenerated(false)
1028, mHandle(nullptr)
1029, mModule(nullptr)
1030, mKernelStateType(nullptr)
1031, mInputStreamSets(std::move(stream_inputs))
1032, mOutputStreamSets(std::move(stream_outputs))
1033, mInputScalars(std::move(scalar_inputs))
1034, mOutputScalars(std::move(scalar_outputs))
1035, mInternalScalars( std::move(internal_scalars))
1036, mCurrentMethod(nullptr)
1037, mStride(0)
1038, mIsFinal(nullptr)
1039, mNumOfStrides(nullptr)
1040, mKernelName(annotateKernelNameWithDebugFlags(std::move(kernelName))) {
1041
1042}
1043
1044Kernel::~Kernel() { }
1045
1046// CONSTRUCTOR
1047SegmentOrientedKernel::SegmentOrientedKernel(std::string && kernelName,
1048                                             Bindings && stream_inputs,
1049                                             Bindings && stream_outputs,
1050                                             Bindings && scalar_parameters,
1051                                             Bindings && scalar_outputs,
1052                                             Bindings && internal_scalars)
1053: Kernel(std::move(kernelName),
1054         std::move(stream_inputs), std::move(stream_outputs),
1055         std::move(scalar_parameters), std::move(scalar_outputs),
1056         std::move(internal_scalars))  {
1057
1058}
1059
1060
1061}
Note: See TracBrowser for help on using the repository browser.