source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 6184

Last change on this file since 6184 was 6184, checked in by nmedfort, 5 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 47.9 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <toolchain/driver.h>
9#include <kernels/relationship.h>
10#include <kernels/streamset.h>
11#include <kernels/kernel_builder.h>
12#include <llvm/IR/CallingConv.h>
13#include <llvm/IR/DerivedTypes.h>
14#include <llvm/IR/Constants.h>
15#include <llvm/IR/Function.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/MDBuilder.h>
18#include <llvm/IR/Module.h>
19#include <llvm/Support/raw_ostream.h>
20#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
21#include <llvm/Bitcode/ReaderWriter.h>
22#else
23#include <llvm/Bitcode/BitcodeWriter.h>
24#endif
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/Support/Debug.h>
27#include <boost/uuid/sha1.hpp>
28#include <llvm/Support/Format.h>
29#include <sstream>
30
31
32using namespace llvm;
33using namespace boost;
34using boost::container::flat_set;
35
36namespace kernel {
37
38using AttrId = Attribute::KindId;
39using RateValue = ProcessingRate::RateValue;
40using RateId = ProcessingRate::KindId;
41using StreamPort = Kernel::StreamSetPort;
42using Port = Kernel::Port;
43
44// TODO: make "namespaced" internal scalars that are automatically grouped into cache-aligned structs
45// within the kernel state to hide the complexity from the user?
46
47const static auto INIT_SUFFIX = "_Init";
48const static auto DO_SEGMENT_SUFFIX = "_DoSegment";
49const static auto TERMINATE_SUFFIX = "_Terminate";
50
51/** ------------------------------------------------------------------------------------------------------------- *
52 * @brief setInstance
53 ** ------------------------------------------------------------------------------------------------------------- */
54void  Kernel::setHandle(const std::unique_ptr<KernelBuilder> & b, Value * const handle) {
55    assert ("handle cannot be null!" && handle);
56    assert ("handle must be a pointer!" && handle->getType()->isPointerTy());
57    assert ("handle must be a kernel state object!" && (handle->getType()->getPointerElementType() == mKernelStateType));
58    #ifndef NDEBUG
59    const Function * const handleFunction = isa<Argument>(handle) ? cast<Argument>(handle)->getParent() : cast<Instruction>(handle)->getParent()->getParent();
60    const Function * const builderFunction = b->GetInsertBlock()->getParent();
61    assert ("handle is not from the current function." && (handleFunction == builderFunction));
62    #endif
63    mHandle = handle;
64}
65
66/** ------------------------------------------------------------------------------------------------------------- *
67 * @brief isLocalBuffer
68 ** ------------------------------------------------------------------------------------------------------------- */
69inline bool isLocalBuffer(const Binding & output) {
70    return output.getRate().isUnknown() || output.hasAttribute(AttrId::ManagedBuffer);
71}
72
73/** ------------------------------------------------------------------------------------------------------------- *
74 * @brief addBaseKernelProperties
75 *
76 * Base kernel properties are those that the pipeline requires access to and must be in a fixed memory location.
77 ** ------------------------------------------------------------------------------------------------------------- */
78void Kernel::addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b) {
79
80    // Set the default kernel stride.
81    if (mStride == 0) {
82        mStride = b->getBitBlockWidth();
83    }
84
85    // TODO: if a stream has an Expandable or ManagedBuffer attribute or is produced at an Unknown rate,
86    // the pipeline ought to pass the stream as a DynamicBuffer. This will require some coordination between
87    // the pipeline and kernel to ensure both have a consistent view of the buffer and that if either expands,
88    // any other kernel that is (simultaneously) reading from the buffer is unaffected.
89
90    mStreamSetInputBuffers.clear();
91    const auto numOfInputStreams = mInputStreamSets.size();
92    mStreamSetInputBuffers.reserve(numOfInputStreams);
93    for (unsigned i = 0; i < numOfInputStreams; ++i) {
94        const Binding & input = mInputStreamSets[i];
95        mStreamSetInputBuffers.emplace_back(new ExternalBuffer(b, input.getType()));
96    }
97
98    mStreamSetOutputBuffers.clear();
99    const auto numOfOutputStreams = mOutputStreamSets.size();
100    mStreamSetOutputBuffers.reserve(numOfOutputStreams);
101    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
102        const Binding & output = mOutputStreamSets[i];
103        mStreamSetOutputBuffers.emplace_back(new ExternalBuffer(b, output.getType()));
104    }
105
106    IntegerType * const sizeTy = b->getSizeTy();
107    PointerType * const sizePtrPtrTy = sizeTy->getPointerTo()->getPointerTo();
108
109    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
110    addInternalScalar(sizeTy, TERMINATION_SIGNAL);
111
112    // TODO: if we had a way of easily calculating the number of processed/produced items of the
113    // final stride of a non-deferred fixed rate stream, we could avoid storing the item counts.
114    for (unsigned i = 0; i < numOfInputStreams; ++i) {
115        const Binding & input = mInputStreamSets[i];
116        addInternalScalar(sizeTy, input.getName() + PROCESSED_ITEM_COUNT_SUFFIX);
117        if (LLVM_UNLIKELY(input.isDeferred())) {
118            addInternalScalar(sizeTy, input.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
119        }
120    }
121
122    // If an output is a managed buffer, we need to store both the buffer and a set of consumers.
123    Type * const consumerSetTy = StructType::get(b->getContext(), {sizeTy, sizePtrPtrTy})->getPointerTo();
124    for (unsigned i = 0; i < numOfOutputStreams; ++i) {
125        const Binding & output = mOutputStreamSets[i];
126        addInternalScalar(sizeTy, output.getName() + PRODUCED_ITEM_COUNT_SUFFIX);
127        if (LLVM_UNLIKELY(output.isDeferred())) {
128            addInternalScalar(sizeTy, output.getName() + NON_DEFERRED_ITEM_COUNT_SUFFIX);
129        }
130        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
131            Type * const handleTy = mStreamSetOutputBuffers[i]->getHandleType(b);
132            addInternalScalar(handleTy, output.getName() + BUFFER_HANDLE_SUFFIX);
133            addInternalScalar(consumerSetTy, output.getName() + CONSUMER_SUFFIX);
134            addInternalScalar(sizeTy, output.getName() + CONSUMED_ITEM_COUNT_SUFFIX);
135        }
136    }
137
138    // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
139    // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
140    // will be able to add instrumentation to cached modules without recompilation.
141    addInternalScalar(b->getInt64Ty(), CYCLECOUNT_SCALAR);
142
143}
144
145/** ------------------------------------------------------------------------------------------------------------- *
146 * @brief addScalarToMap
147 ** ------------------------------------------------------------------------------------------------------------- */
148void Kernel::addScalarToMap(const std::string & name, const ScalarType scalarType, const unsigned index) {
149    const auto r = mScalarMap.emplace(name, ScalarField{scalarType, index});
150    if (LLVM_UNLIKELY(!r.second)) {
151        const ScalarField & sf = r.first->second;
152        if (LLVM_UNLIKELY(sf.type != scalarType || sf.index != index)) {
153            report_fatal_error(getName() + " already contains scalar " + name);
154        }
155    }
156}
157
158/** ------------------------------------------------------------------------------------------------------------- *
159 * @brief addScalarToMap
160 ** ------------------------------------------------------------------------------------------------------------- */
161void Kernel::addStreamToMap(const std::string & name, const Port port, const unsigned index) {
162    const auto r = mStreamSetMap.emplace(name, std::make_pair(port, index));
163    if (LLVM_UNLIKELY(!r.second)) {
164        const StreamPort & sf = r.first->second;
165        if (LLVM_UNLIKELY(sf.first != port || sf.second != index)) {
166            report_fatal_error(getName() + " already contains stream " + name);
167        }
168    }
169}
170
171/** ------------------------------------------------------------------------------------------------------------- *
172 * @brief addKernelDeclarations
173 ** ------------------------------------------------------------------------------------------------------------- */
174void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
175    if (mKernelStateType == nullptr) {
176        throw std::runtime_error("Kernel state definition " + getName() + " has not been finalized.");
177    }
178    addInitializeDeclaration(b);
179    addDoSegmentDeclaration(b);
180    addFinalizeDeclaration(b);
181    linkExternalMethods(b);
182}
183
184/** ------------------------------------------------------------------------------------------------------------- *
185 * @brief addInitializeDeclaration
186 ** ------------------------------------------------------------------------------------------------------------- */
187void Kernel::addInitializeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
188
189    std::vector<Type *> params;
190    params.push_back(mKernelStateType->getPointerTo());
191    for (const Binding & binding : mInputScalars) {
192        params.push_back(binding.getType());
193    }
194
195    FunctionType * const initType = FunctionType::get(b->getVoidTy(), params, false);
196    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, b->getModule());
197    initFunc->setCallingConv(CallingConv::C);
198    initFunc->setDoesNotThrow();
199    auto args = initFunc->arg_begin();
200    args->setName("self");
201    for (const Binding & binding : mInputScalars) {
202        (++args)->setName(binding.getName());
203    }
204
205    assert (std::next(args) == initFunc->arg_end());
206}
207
208/** ------------------------------------------------------------------------------------------------------------- *
209 * @brief callGenerateInitializeMethod
210 ** ------------------------------------------------------------------------------------------------------------- */
211void Kernel::callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b) {
212    const Kernel * const storedKernel = b->getKernel();
213    b->setKernel(this);
214    Value * const storedHandle = getHandle();
215    mCurrentMethod = getInitFunction(b->getModule());
216    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
217    auto args = mCurrentMethod->arg_begin();
218    setHandle(b, &*args);
219    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
220        b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
221    }
222    b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getHandle());
223    for (const auto & binding : mInputScalars) {
224        b->setScalarField(binding.getName(), &*(++args));
225    }
226
227    const auto numOfOutputs = mOutputStreamSets.size();
228    for (unsigned i = 0; i < numOfOutputs; i++) {
229        const Binding & output = mOutputStreamSets[i];
230        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
231            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
232            mStreamSetOutputBuffers[i]->setHandle(b, handle);
233        }
234    }
235    generateInitializeMethod(b);
236    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
237        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
238    }
239    b->CreateRetVoid();
240    b->setKernel(storedKernel);
241    mHandle = storedHandle;
242    mCurrentMethod = nullptr;
243}
244
245/** ------------------------------------------------------------------------------------------------------------- *
246 * @brief addDoSegmentDeclaration
247 ** ------------------------------------------------------------------------------------------------------------- */
248void Kernel::addDoSegmentDeclaration(const std::unique_ptr<KernelBuilder> & b) {
249
250    IntegerType * const sizeTy = b->getSizeTy();
251    PointerType * const sizePtrTy = sizeTy->getPointerTo();
252    Type * const voidTy = b->getVoidTy();
253
254    std::vector<Type *> params;
255    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
256    params.push_back(mKernelStateType->getPointerTo());  // self
257    params.push_back(sizeTy); // numOfStrides
258    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
259        Type * const bufferType = mStreamSetInputBuffers[i]->getType();
260        params.push_back(bufferType->getPointerTo()); // logical "base" input address
261        params.push_back(sizeTy);  // accessible input items (after non-deferred processed item count)
262        const Binding & input = mInputStreamSets[i];
263        unsigned numOfPopCountArrays = 0;
264        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
265            ++numOfPopCountArrays;
266        }
267        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
268            ++numOfPopCountArrays;
269        }
270        if (numOfPopCountArrays) {
271            params.insert(params.end(), numOfPopCountArrays, sizePtrTy); // popCountRef array (length is numOfStrides)
272        }
273    }
274    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
275        const Binding & output = mOutputStreamSets[i];
276        if (LLVM_LIKELY(!isLocalBuffer(output))) {
277            Type * const bufferType = mStreamSetOutputBuffers[i]->getType();
278            params.push_back(bufferType->getPointerTo()); // logical "base" output address
279            params.push_back(sizeTy); // writable output items (after non-deferred produced item count)
280        }
281    }
282
283    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
284    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, b->getModule());
285    doSegment->setCallingConv(CallingConv::C);
286    doSegment->setDoesNotThrow();
287    auto args = doSegment->arg_begin();
288    args->setName("self");
289    (++args)->setName("numOfStrides");
290    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
291        const Binding & input = mInputStreamSets[i];
292        (++args)->setName(input.getName());
293        (++args)->setName(input.getName() + "_accessible");
294        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
295            (++args)->setName(input.getName() + "_popCountArray");
296        }
297        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
298            (++args)->setName(input.getName() + "_negatedPopCountArray");
299        }
300    }
301    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
302        const Binding & output = mOutputStreamSets[i];
303        if (LLVM_LIKELY(!isLocalBuffer(output))) {
304            (++args)->setName(output.getName());
305            (++args)->setName(output.getName() + "_writable");
306        }
307    }
308    assert (std::next(args) == doSegment->arg_end());
309}
310
311/** ------------------------------------------------------------------------------------------------------------- *
312 * @brief callGenerateKernelMethod
313 ** ------------------------------------------------------------------------------------------------------------- */
314void Kernel::callGenerateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
315
316    assert (mInputStreamSets.size() == mStreamSetInputBuffers.size());
317    assert (mOutputStreamSets.size() == mStreamSetOutputBuffers.size());
318
319    const Kernel * const storedKernel = b->getKernel();
320    b->setKernel(this);
321    Value * const storedHandle = getHandle();
322    mCurrentMethod = getDoSegmentFunction(b->getModule());
323    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
324    auto args = mCurrentMethod->arg_begin();
325    setHandle(b, &*(args++));
326    mNumOfStrides = &*(args++);
327    mIsFinal = b->CreateIsNull(mNumOfStrides);
328    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
329        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
330    }
331    // NOTE: the disadvantage of passing the stream pointers as a parameter is that it becomes more difficult
332    // to access a stream set from a LLVM function call. We could create a stream-set aware function creation
333    // and call system here but that is not an ideal way of handling this.
334
335    // TODO: use a graph to depict relations between binding? It would be better to first move to a model
336    // where inputs and outputs are contained in a single parameter vector.
337
338    const auto numOfInputs = getNumOfStreamInputs();
339    mAccessibleInputItems.resize(numOfInputs, nullptr);
340    mAvailableInputItems.resize(numOfInputs, nullptr);
341    mPopCountRateArray.resize(numOfInputs, nullptr);
342    mNegatedPopCountRateArray.resize(numOfInputs, nullptr);
343    for (unsigned i = 0; i < numOfInputs; i++) {
344        const Binding & input = mInputStreamSets[i];
345        assert (args != mCurrentMethod->arg_end());
346        Value * const addr = &*(args++);
347        auto & buffer = mStreamSetInputBuffers[i];
348        Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
349        buffer->setHandle(b, localHandle);
350        buffer->setBaseAddress(b.get(), addr);
351        assert (args != mCurrentMethod->arg_end());
352        Value * const accessible = &*(args++);
353        mAccessibleInputItems[i] = accessible;
354        Value * const processed = b->getNonDeferredProcessedItemCount(input);
355        Value * capacity = b->CreateAdd(processed, accessible);
356        mAvailableInputItems[i] = capacity;
357        if (input.hasLookahead()) {
358            capacity = b->CreateAdd(capacity, b->getSize(input.getLookahead()));
359        }
360        buffer->setCapacity(b.get(), capacity);
361        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
362            assert (args != mCurrentMethod->arg_end());
363            mPopCountRateArray[i] = &*(args++);
364        }
365        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
366            assert (args != mCurrentMethod->arg_end());
367            mNegatedPopCountRateArray[i] = &*(args++);
368        }
369    }
370
371    // set all of the output buffers
372    const auto numOfOutputs = getNumOfStreamOutputs();
373    mWritableOutputItems.resize(numOfOutputs, nullptr);
374    for (unsigned i = 0; i < numOfOutputs; i++) {
375        // If an output is a managed buffer, the address is stored within the state instead
376        // of being passed in through the function call.
377        auto & buffer = mStreamSetOutputBuffers[i];
378        const Binding & output = mOutputStreamSets[i];
379        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
380            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
381            buffer->setHandle(b, handle);
382        } else {
383            assert (args != mCurrentMethod->arg_end());
384            Value * const logicalBaseAddress = &*(args++);
385            Value * const localHandle = b->CreateAlloca(buffer->getHandleType(b));
386            buffer->setHandle(b, localHandle);
387            buffer->setBaseAddress(b.get(), logicalBaseAddress);
388            assert (args != mCurrentMethod->arg_end());
389            Value * const writable = &*(args++);
390            mWritableOutputItems[i] = writable;
391            Value * const produced = b->getNonDeferredProducedItemCount(output);
392            Value * const capacity = b->CreateAdd(produced, writable);
393            buffer->setCapacity(b.get(), capacity);
394        }
395    }
396    assert (args == mCurrentMethod->arg_end());
397
398    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
399        Value * const terminated = b->getTerminationSignal();
400        b->CreateAssert(b->CreateNot(terminated), getName() + " was called after termination");
401    }
402
403    // Calculate and/or load the accessible and writable item counts. If they are unneeded,
404    // LLVM ought to recognize them as dead code and remove them.
405    generateKernelMethod(b); // must be overridden by the Kernel subtype   
406    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
407        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
408    }
409    b->CreateRetVoid();
410
411    // Clean up all of the constructed buffers.
412    b->setKernel(storedKernel);
413    mHandle = storedHandle;
414    mCurrentMethod = nullptr;
415    mIsFinal = nullptr;
416    mNumOfStrides = nullptr;
417    mAccessibleInputItems.clear();
418    mPopCountRateArray.clear();
419    mNegatedPopCountRateArray.clear();
420}
421
422/** ------------------------------------------------------------------------------------------------------------- *
423 * @brief addFinalizeDeclaration
424 ** ------------------------------------------------------------------------------------------------------------- */
425void Kernel::addFinalizeDeclaration(const std::unique_ptr<KernelBuilder> & b) {
426    Type * resultType = nullptr;
427    if (mOutputScalars.empty()) {
428        resultType = b->getVoidTy();
429    } else {
430        const auto n = mOutputScalars.size();
431        Type * outputType[n];
432        for (unsigned i = 0; i < n; ++i) {
433            outputType[i] = mOutputScalars[i].getType();
434        }
435        if (n == 1) {
436            resultType = outputType[0];
437        } else {
438            resultType = StructType::get(b->getContext(), ArrayRef<Type *>(outputType, n));
439        }
440    }
441    PointerType * const selfType = mKernelStateType->getPointerTo();
442    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
443    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
444    terminateFunc->setCallingConv(CallingConv::C);
445    terminateFunc->setDoesNotThrow();
446    auto args = terminateFunc->arg_begin();
447    args->setName("self");
448    assert (std::next(args) == terminateFunc->arg_end());
449}
450
451/** ------------------------------------------------------------------------------------------------------------- *
452 * @brief callGenerateFinalizeMethod
453 ** ------------------------------------------------------------------------------------------------------------- */
454void Kernel::callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b) {
455
456    const Kernel * const storedKernel = b->getKernel();
457    b->setKernel(this);
458    mCurrentMethod = getTerminateFunction(b->getModule());
459    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
460    auto args = mCurrentMethod->arg_begin();
461    setHandle(b, &*(args++));
462    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
463        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
464    }
465    const auto numOfOutputs = mOutputStreamSets.size();
466    for (unsigned i = 0; i < numOfOutputs; i++) {
467        const Binding & output = mOutputStreamSets[i];
468        if (LLVM_UNLIKELY(isLocalBuffer(output))) {
469            Value * const handle = b->getScalarFieldPtr(output.getName() + BUFFER_HANDLE_SUFFIX);
470            mStreamSetOutputBuffers[i]->setHandle(b, handle);
471        }
472    }
473
474    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
475    const auto outputs = getFinalOutputScalars(b);
476    b->CreateFree(mHandle);
477    mHandle = nullptr;
478
479    if (outputs.empty()) {
480        b->CreateRetVoid();
481    } else {
482        const auto n = outputs.size();
483        if (n == 1) {
484            b->CreateRet(outputs[0]);
485        } else {
486            b->CreateAggregateRet(outputs.data(), n);
487        }
488    }
489
490    b->setKernel(storedKernel);
491    mCurrentMethod = nullptr;
492}
493
494/** ------------------------------------------------------------------------------------------------------------- *
495 * @brief callGenerateFinalizeMethod
496 ** ------------------------------------------------------------------------------------------------------------- */
497std::vector<Value *> Kernel::getFinalOutputScalars(const std::unique_ptr<KernelBuilder> & b) {
498    const auto n = mOutputScalars.size();
499    std::vector<Value *> outputs(n);
500    for (unsigned i = 0; i < n; ++i) {
501        outputs[i] = b->getScalarField(mOutputScalars[i].getName());
502    }
503    return outputs;
504}
505
506/** ------------------------------------------------------------------------------------------------------------- *
507 * @brief getCacheName
508 ** ------------------------------------------------------------------------------------------------------------- */
509std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & b) const {
510    std::stringstream cacheName;
511    cacheName << getName() << '_' << b->getBuilderUniqueName();
512    return cacheName.str();
513}
514
515/** ------------------------------------------------------------------------------------------------------------- *
516 * @brief setModule
517 ** ------------------------------------------------------------------------------------------------------------- */
518Module * Kernel::setModule(Module * const module) {
519    assert (mModule == nullptr || mModule == module);
520    assert (module != nullptr);
521    mModule = module;
522    return mModule;
523}
524
525/** ------------------------------------------------------------------------------------------------------------- *
526 * @brief makeModule
527 ** ------------------------------------------------------------------------------------------------------------- */
528Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & b) {
529    Module * m = new Module(getCacheName(b), b->getContext());
530    m->setTargetTriple(b->getModule()->getTargetTriple());
531    m->setDataLayout(b->getModule()->getDataLayout());
532    return setModule(m);
533}
534
535
536/** ------------------------------------------------------------------------------------------------------------- *
537 * @brief getInitFunction
538 ** ------------------------------------------------------------------------------------------------------------- */
539Function * Kernel::getInitFunction(Module * const module) const {
540    const auto name = getName() + INIT_SUFFIX;
541    Function * f = module->getFunction(name);
542    if (LLVM_UNLIKELY(f == nullptr)) {
543        report_fatal_error("Cannot find " + name);
544    }
545    return f;
546}
547
548/** ------------------------------------------------------------------------------------------------------------- *
549 * @brief getDoSegmentFunction
550 ** ------------------------------------------------------------------------------------------------------------- */
551Function * Kernel::getDoSegmentFunction(Module * const module) const {
552    const auto name = getName() + DO_SEGMENT_SUFFIX;
553    Function * f = module->getFunction(name);
554    if (LLVM_UNLIKELY(f == nullptr)) {
555        report_fatal_error("Cannot find " + name);
556    }
557    return f;
558}
559
560/** ------------------------------------------------------------------------------------------------------------- *
561 * @brief getTerminateFunction
562 ** ------------------------------------------------------------------------------------------------------------- */
563Function * Kernel::getTerminateFunction(Module * const module) const {
564    const auto name = getName() + TERMINATE_SUFFIX;
565    Function * f = module->getFunction(name);
566    if (LLVM_UNLIKELY(f == nullptr)) {
567        report_fatal_error("Cannot find " + name);
568    }
569    return f;
570}
571
572/** ------------------------------------------------------------------------------------------------------------- *
573 * @brief prepareKernel
574 ** ------------------------------------------------------------------------------------------------------------- */
575void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
576    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
577        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
578    }
579    addBaseKernelProperties(b);
580    addInternalKernelProperties(b);
581    // NOTE: StructType::create always creates a new type even if an identical one exists.
582    if (LLVM_UNLIKELY(mModule == nullptr)) {
583        makeModule(b);
584    }
585    mKernelStateType = mModule->getTypeByName(getName());
586
587
588    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
589        std::vector<llvm::Type *> fields;
590        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
591        for (const Binding & scalar : mInputScalars) {
592            assert (scalar.getType());
593            fields.push_back(scalar.getType());
594        }
595        for (const Binding & scalar : mOutputScalars) {
596            assert (scalar.getType());
597            fields.push_back(scalar.getType());
598        }
599        for (const Binding & scalar : mInternalScalars) {
600            assert (scalar.getType());
601            fields.push_back(scalar.getType());
602        }
603        mKernelStateType = StructType::create(b->getContext(), fields, getName());       
604    }
605
606
607
608
609    assert (isa<StructType>(mKernelStateType));
610}
611
612/** ------------------------------------------------------------------------------------------------------------- *
613 * @brief addInternalScalar
614 ** ------------------------------------------------------------------------------------------------------------- */
615void Kernel::addInternalScalar(llvm::Type * type, const std::string & name) {
616    const auto index = mInternalScalars.size();
617    mInternalScalars.emplace_back(type, name);
618    addScalarToMap(name, ScalarType::Internal, index);
619}
620
621/** ------------------------------------------------------------------------------------------------------------- *
622 * @brief getScalarIndex
623 ** ------------------------------------------------------------------------------------------------------------- */
624unsigned Kernel::getScalarIndex(const std::string & fieldName) const {
625    const auto & field = getScalarField(fieldName);
626    assert (mKernelStateType);
627    unsigned index = field.index;
628    switch (field.type) {
629        case ScalarType::Internal:
630            index += mOutputScalars.size();
631        case ScalarType::Output:
632            index += mInputScalars.size();
633        case ScalarType::Input:
634            break;
635    }
636    assert (index < mKernelStateType->getStructNumElements());
637    return index;
638}
639
640/** ------------------------------------------------------------------------------------------------------------- *
641 * @brief prepareCachedKernel
642 ** ------------------------------------------------------------------------------------------------------------- */
643void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
644    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
645        report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
646    } 
647    addBaseKernelProperties(b);
648    mKernelStateType = getModule()->getTypeByName(getName());
649    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
650        report_fatal_error("Kernel definition for " + getName() + " was not found in the cache!");
651    }
652    assert (isa<StructType>(mKernelStateType));
653}
654
655/** ------------------------------------------------------------------------------------------------------------- *
656 * @brief makeSignature
657 *
658 * Default kernel signature: generate the IR and emit as byte code.
659 ** ------------------------------------------------------------------------------------------------------------- */
660std::string Kernel::makeSignature(const std::unique_ptr<KernelBuilder> & b) {
661    if (LLVM_UNLIKELY(hasSignature())) {
662        generateKernel(b);
663        std::string tmp;
664        raw_string_ostream signature(tmp);
665        WriteBitcodeToFile(getModule(), signature);
666        return signature.str();
667    } else {
668        return getModule()->getModuleIdentifier();
669    }
670}
671
672/** ------------------------------------------------------------------------------------------------------------- *
673 * @brief getStringHash
674 *
675 * Create a fixed length string hash of the given str
676 ** ------------------------------------------------------------------------------------------------------------- */
677std::string Kernel::getStringHash(const std::string & str) {
678
679    uint32_t digest[5]; // 160 bits in total
680    boost::uuids::detail::sha1 sha1;
681    sha1.process_bytes(str.c_str(), str.size());
682    sha1.get_digest(digest);
683
684    std::string buffer;
685    buffer.reserve((5 * 8) + 1);
686    raw_string_ostream out(buffer);
687    for (unsigned i = 0; i < 5; ++i) {
688        out << format_hex_no_prefix(digest[i], 8);
689    }
690    out.flush();
691
692    return buffer;
693}
694
695/** ------------------------------------------------------------------------------------------------------------- *
696 * @brief createInstance
697 ** ------------------------------------------------------------------------------------------------------------- */
698Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
699    assert (mKernelStateType && "cannot create instance before calling prepareKernel() or prepareCachedKernel()");
700    Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
701    Value * handle = nullptr;
702    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
703        handle = b->CreateAlignedMalloc(size, b->getPageSize());
704        b->CreateMProtect(handle, size, CBuilder::Protect::READ);
705    } else {
706        handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
707    }
708//    mHandle = b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
709//    return mHandle;
710    return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
711}
712
713/** ------------------------------------------------------------------------------------------------------------- *
714 * @brief initializeInstance
715 ** ------------------------------------------------------------------------------------------------------------- */
716void Kernel::initializeInstance(const std::unique_ptr<KernelBuilder> & b, std::vector<Value *> &args) {
717    assert (args.size() == getNumOfScalarInputs() + 1);
718    assert (args[0] && "cannot initialize before creation");
719    assert (args[0]->getType()->getPointerElementType() == mKernelStateType);
720    b->setKernel(this);
721    Function * const init = getInitFunction(b->getModule());
722    b->CreateCall(init, args);
723}
724
725/** ------------------------------------------------------------------------------------------------------------- *
726 * @brief generateKernel
727 ** ------------------------------------------------------------------------------------------------------------- */
728void Kernel::generateKernel(const std::unique_ptr<KernelBuilder> & b) {
729    if (LLVM_UNLIKELY(mIsGenerated)) return;
730    b->setKernel(this);
731    b->setModule(mModule);
732    addKernelDeclarations(b);
733    callGenerateInitializeMethod(b);
734    callGenerateKernelMethod(b);
735    callGenerateFinalizeMethod(b);
736    addAdditionalFunctions(b);
737    mIsGenerated = true;
738}
739
740/** ------------------------------------------------------------------------------------------------------------- *
741 * @brief finalizeInstance
742 ** ------------------------------------------------------------------------------------------------------------- */
743Value * Kernel::finalizeInstance(const std::unique_ptr<KernelBuilder> & b) {
744    assert (mHandle && "was not set");
745    Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
746    mHandle = nullptr;
747    if (mOutputScalars.empty()) {
748        assert (!result || result->getType()->isVoidTy());
749        result = nullptr;
750    }
751    return result;
752
753}
754
755/** ------------------------------------------------------------------------------------------------------------- *
756 * @brief getScalarField
757 ** ------------------------------------------------------------------------------------------------------------- */
758const Kernel::ScalarField & Kernel::getScalarField(const std::string & name) const {
759    assert (!mScalarMap.empty());
760    const auto f = mScalarMap.find(name);
761    if (LLVM_UNLIKELY(f == mScalarMap.end())) {
762        report_fatal_error(getName() + " does not contain scalar: " + name);
763    }
764    return f->second;
765}
766
767/** ------------------------------------------------------------------------------------------------------------- *
768 * @brief getInputScalarBinding
769 ** ------------------------------------------------------------------------------------------------------------- */
770Binding & Kernel::getInputScalarBinding(const std::string & name) {
771    const ScalarField & field = getScalarField(name);
772    if (LLVM_UNLIKELY(field.type != ScalarType::Input)) {
773        report_fatal_error(getName() + "." + name + "is not an input scalar");
774    }
775    return mInputScalars[field.index];
776}
777
778/** ------------------------------------------------------------------------------------------------------------- *
779 * @brief getOutputScalarBinding
780 ** ------------------------------------------------------------------------------------------------------------- */
781Binding & Kernel::getOutputScalarBinding(const std::string & name) {
782    const ScalarField & field = getScalarField(name);
783    if (LLVM_UNLIKELY(field.type != ScalarType::Output)) {
784        report_fatal_error(getName() + "." + name + "is not an output scalar");
785    }
786    return mOutputScalars[field.index];
787}
788
789/** ------------------------------------------------------------------------------------------------------------- *
790 * @brief getStreamPort
791 ** ------------------------------------------------------------------------------------------------------------- */
792Kernel::StreamSetPort Kernel::getStreamPort(const std::string & name) const {
793    const auto f = mStreamSetMap.find(name);
794    if (LLVM_UNLIKELY(f == mStreamSetMap.end())) {
795        assert (!mStreamSetMap.empty());
796        report_fatal_error(getName() + " does not contain stream set " + name);
797    }
798    return f->second;
799}
800
801/** ------------------------------------------------------------------------------------------------------------- *
802 * @brief getBinding
803 ** ------------------------------------------------------------------------------------------------------------- */
804const Binding & Kernel::getStreamBinding(const std::string & name) const {
805    Port port; unsigned index;
806    std::tie(port, index) = getStreamPort(name);
807    return (port == Port::Input) ? getInputStreamSetBinding(index) : getOutputStreamSetBinding(index);
808}
809
810/** ------------------------------------------------------------------------------------------------------------- *
811 * @brief getLowerBound
812 ** ------------------------------------------------------------------------------------------------------------- */
813RateValue Kernel::getLowerBound(const Binding & binding) const {
814    const ProcessingRate & rate = binding.getRate();
815    if (rate.hasReference()) {
816        return rate.getLowerBound() * getLowerBound(getStreamBinding(rate.getReference()));
817    } else {
818        return rate.getLowerBound();
819    }
820}
821
822/** ------------------------------------------------------------------------------------------------------------- *
823 * @brief getUpperBound
824 ** ------------------------------------------------------------------------------------------------------------- */
825RateValue Kernel::getUpperBound(const Binding & binding) const {
826    const ProcessingRate & rate = binding.getRate();
827    if (rate.hasReference()) {
828        return rate.getUpperBound() * getUpperBound(getStreamBinding(rate.getReference()));
829    } else {
830        return rate.getUpperBound();
831    }
832}
833
834/** ------------------------------------------------------------------------------------------------------------- *
835 * @brief isCountable
836 ** ------------------------------------------------------------------------------------------------------------- */
837bool Kernel::isCountable(const Binding & binding) const {
838    const ProcessingRate & rate = binding.getRate();
839    if (rate.isFixed() || rate.isPopCount() || rate.isNegatedPopCount()) {
840        return true;
841//    } else if (rate.isRelative()) {
842//        return isCountable(getStreamBinding(rate.getReference()));
843    } else {
844        return false;
845    }
846}
847
848/** ------------------------------------------------------------------------------------------------------------- *
849 * @brief isCalculable
850 ** ------------------------------------------------------------------------------------------------------------- */
851bool Kernel::isCalculable(const Binding & binding) const {
852    const ProcessingRate & rate = binding.getRate();
853    if (rate.isFixed() || rate.isBounded()) {
854        return true;
855    } else if (rate.isRelative()) {
856        return isCalculable(getStreamBinding(rate.getReference()));
857    } else {
858        return false;
859    }
860}
861
862/** ------------------------------------------------------------------------------------------------------------- *
863 * @brief requiresOverflow
864 ** ------------------------------------------------------------------------------------------------------------- */
865bool Kernel::requiresOverflow(const Binding & binding) const {
866    const ProcessingRate & rate = binding.getRate();
867    if (rate.isFixed() || binding.hasAttribute(AttrId::BlockSize)) {
868        return false;
869    } else if (rate.isRelative()) {
870        return requiresOverflow(getStreamBinding(rate.getReference()));
871    } else {
872        return true;
873    }
874}
875
876/** ------------------------------------------------------------------------------------------------------------- *
877 * @brief isUnknownRate
878 ** ------------------------------------------------------------------------------------------------------------- */
879bool Kernel::isUnknownRate(const Binding & binding) const {
880    const ProcessingRate & rate = binding.getRate();
881    if (rate.isUnknown()) {
882        return true;
883    } else if (rate.isRelative()) {
884        return isUnknownRate(getStreamBinding(rate.getReference()));
885    } else {
886        return false;
887    }
888}
889
890/** ------------------------------------------------------------------------------------------------------------- *
891 * @brief initializeBindings
892 ** ------------------------------------------------------------------------------------------------------------- */
893void Kernel::initializeBindings(BaseDriver & driver) {
894
895    for (unsigned i = 0; i < mInputScalars.size(); i++) {
896        Binding & input = mInputScalars[i];
897        addScalarToMap(input.getName(), ScalarType::Input, i);
898        if (input.getRelationship() == nullptr) {
899            input.setRelationship(driver.CreateScalar(input.getType()));
900        }
901    }
902    for (unsigned i = 0; i < mInputStreamSets.size(); i++) {
903        Binding & input = mInputStreamSets[i];
904        if (LLVM_UNLIKELY(input.getRelationship() == nullptr)) {
905            report_fatal_error(getName()+ "." + input.getName() + " must be set upon construction");
906        }
907        addStreamToMap(input.getName(), Port::Input, i);
908    }
909    for (unsigned i = 0; i < mOutputStreamSets.size(); i++) {
910        Binding & output = mOutputStreamSets[i];
911        if (LLVM_UNLIKELY(output.getRelationship() == nullptr)) {
912            report_fatal_error(getName()+ "." + output.getName() + " must be set upon construction");
913        }
914        addStreamToMap(output.getName(), Port::Output, i);
915    }
916    for (unsigned i = 0; i < mInternalScalars.size(); i++) {
917        const Binding & internal = mInternalScalars[i];
918        addScalarToMap(internal.getName(), ScalarType::Internal, i);
919    }
920    for (unsigned i = 0; i < mOutputScalars.size(); i++) {
921        Binding & output = mOutputScalars[i];
922        addScalarToMap(output.getName(), ScalarType::Output, i);
923        if (output.getRelationship() == nullptr) {
924            output.setRelationship(driver.CreateScalar(output.getType()));
925        }
926    }
927}
928
929/** ------------------------------------------------------------------------------------------------------------- *
930 * @brief setInputStreamSetAt
931 ** ------------------------------------------------------------------------------------------------------------- */
932void Kernel::setInputStreamSetAt(const unsigned i, StreamSet * const value) {
933    mInputStreamSets[i].setRelationship(value);
934}
935
936/** ------------------------------------------------------------------------------------------------------------- *
937 * @brief setOutputStreamSetAt
938 ** ------------------------------------------------------------------------------------------------------------- */
939void Kernel::setOutputStreamSetAt(const unsigned i, StreamSet * const value) {
940    mOutputStreamSets[i].setRelationship(value);
941}
942
943/** ------------------------------------------------------------------------------------------------------------- *
944 * @brief setInputScalarAt
945 ** ------------------------------------------------------------------------------------------------------------- */
946void Kernel::setInputScalarAt(const unsigned i, Scalar * const value) {
947    mInputScalars[i].setRelationship(value);
948}
949
950/** ------------------------------------------------------------------------------------------------------------- *
951 * @brief setOutputScalarAt
952 ** ------------------------------------------------------------------------------------------------------------- */
953void Kernel::setOutputScalarAt(const unsigned i, Scalar * const value) {
954    mOutputScalars[i].setRelationship(value);
955}
956
957/** ------------------------------------------------------------------------------------------------------------- *
958 * @brief getPopCountRateItemCount
959 ** ------------------------------------------------------------------------------------------------------------- */
960Value * Kernel::getPopCountRateItemCount(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, Value * const strideIndex) {
961    assert (rate.isPopCount() || rate.isNegatedPopCount());
962    Port refPort;
963    unsigned refIndex = 0;
964    std::tie(refPort, refIndex) = getStreamPort(rate.getReference());
965    assert (refPort == Port::Input);
966    Value * array = nullptr;
967    if (rate.isNegatedPopCount()) {
968        array = mNegatedPopCountRateArray[refIndex];
969    } else {
970        array = mPopCountRateArray[refIndex];
971    }
972    assert (array && "missing pop count array attribute");
973    return b->CreateLoad(b->CreateGEP(array, strideIndex));
974}
975
976/** ------------------------------------------------------------------------------------------------------------- *
977 * @brief generateKernelMethod
978 ** ------------------------------------------------------------------------------------------------------------- */
979void SegmentOrientedKernel::generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) {
980    generateDoSegmentMethod(b);
981}
982
983/** ------------------------------------------------------------------------------------------------------------- *
984 * @brief annotateKernelNameWithDebugFlags
985 ** ------------------------------------------------------------------------------------------------------------- */
986inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
987    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
988        name += "_EA";
989    }
990    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
991        name += "_MP";
992    }
993    name += "_O" + std::to_string((int)codegen::OptLevel);
994    return name;
995}
996
997/** ------------------------------------------------------------------------------------------------------------- *
998 * @brief getDefaultFamilyName
999 ** ------------------------------------------------------------------------------------------------------------- */
1000std::string Kernel::getDefaultFamilyName() const {
1001    std::string tmp;
1002    llvm::raw_string_ostream out(tmp);
1003    out << "F";
1004    out << getStride();
1005    AttributeSet::print(out);
1006    for (const Binding & input : mInputScalars) {
1007        out << ",IV("; input.print(this, out); out << ')';
1008    }
1009    for (const Binding & input : mInputStreamSets) {
1010        out << ",IS("; input.print(this, out); out << ')';
1011    }
1012    for (const Binding & output : mOutputStreamSets) {
1013        out << ",OS("; output.print(this, out); out << ')';
1014    }
1015    for (const Binding & output : mOutputScalars) {
1016        out << ",OV("; output.print(this, out); out << ')';
1017    }
1018    out.flush();
1019    return tmp;
1020}
1021
1022// CONSTRUCTOR
1023Kernel::Kernel(std::string && kernelName,
1024               Bindings && stream_inputs,
1025               Bindings && stream_outputs,
1026               Bindings && scalar_inputs,
1027               Bindings && scalar_outputs,
1028               Bindings && internal_scalars)
1029: mIsGenerated(false)
1030, mHandle(nullptr)
1031, mModule(nullptr)
1032, mKernelStateType(nullptr)
1033, mInputStreamSets(std::move(stream_inputs))
1034, mOutputStreamSets(std::move(stream_outputs))
1035, mInputScalars(std::move(scalar_inputs))
1036, mOutputScalars(std::move(scalar_outputs))
1037, mInternalScalars( std::move(internal_scalars))
1038, mCurrentMethod(nullptr)
1039, mStride(0)
1040, mIsFinal(nullptr)
1041, mNumOfStrides(nullptr)
1042, mKernelName(std::move(annotateKernelNameWithDebugFlags(std::move(kernelName)))) {
1043
1044}
1045
1046Kernel::~Kernel() { }
1047
1048// CONSTRUCTOR
1049SegmentOrientedKernel::SegmentOrientedKernel(std::string && kernelName,
1050                                             Bindings && stream_inputs,
1051                                             Bindings && stream_outputs,
1052                                             Bindings && scalar_parameters,
1053                                             Bindings && scalar_outputs,
1054                                             Bindings && internal_scalars)
1055: Kernel(std::move(kernelName),
1056         std::move(stream_inputs), std::move(stream_outputs),
1057         std::move(scalar_parameters), std::move(scalar_outputs),
1058         std::move(internal_scalars))  {
1059
1060}
1061
1062
1063}
Note: See TracBrowser for help on using the repository browser.