source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 43.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Bitcode/ReaderWriter.h>
16#include <llvm/Transforms/Utils/Local.h>
17#include <kernels/streamset.h>
18#include <sstream>
19#include <kernels/kernel_builder.h>
20
21using namespace llvm;
22using namespace parabix;
23
24namespace kernel {
25
26const std::string Kernel::DO_BLOCK_SUFFIX = "_DoBlock";
27const std::string Kernel::FINAL_BLOCK_SUFFIX = "_FinalBlock";
28const std::string Kernel::LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
29const std::string Kernel::PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
30const std::string Kernel::CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
31const std::string Kernel::PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
32const std::string Kernel::TERMINATION_SIGNAL = "terminationSignal";
33const std::string Kernel::BUFFER_PTR_SUFFIX = "_bufferPtr";
34const std::string Kernel::CONSUMER_SUFFIX = "_consumerLocks";
35
36unsigned Kernel::addScalar(Type * const type, const std::string & name) {
37    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
38        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
39    }
40    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
41        report_fatal_error(getName() + " already contains scalar field " + name);
42    }
43    const auto index = mKernelFields.size();
44    mKernelMap.emplace(name, index);
45    mKernelFields.push_back(type);
46    return index;
47}
48
49unsigned Kernel::addUnnamedScalar(Type * const type) {
50    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
51        report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
52    }
53    const auto index = mKernelFields.size();
54    mKernelFields.push_back(type);
55    return index;
56}
57
58// Get the value of a scalar field for the current instance.
59llvm::Value * Kernel::getScalarFieldPtr(llvm::Value * index) const {
60    return iBuilder->CreateGEP(getInstance(), {iBuilder->getInt32(0), index});
61}
62
63llvm::Value * Kernel::getScalarFieldPtr(const std::string & fieldName) const {
64    return getScalarFieldPtr(iBuilder->getInt32(getScalarIndex(fieldName)));
65}
66
67llvm::Value * Kernel::getScalarField(const std::string & fieldName) const {
68    return iBuilder->CreateLoad(getScalarFieldPtr(fieldName), fieldName);
69}
70
71// Set the value of a scalar field for the current instance.
72void Kernel::setScalarField(const std::string & fieldName, llvm::Value * value) const {
73    iBuilder->CreateStore(value, getScalarFieldPtr(fieldName));
74}
75
76void Kernel::prepareStreamSetNameMap() {
77    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
78        mStreamMap.emplace(mStreamSetInputs[i].name, std::make_pair(Port::Input, i));
79    }
80    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
81        mStreamMap.emplace(mStreamSetOutputs[i].name, std::make_pair(Port::Output, i));
82    }
83}
84   
85void Kernel::prepareKernel() {
86    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
87    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
88        report_fatal_error("Cannot prepare kernel after kernel state finalized");
89    }
90    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
91        std::string tmp;
92        raw_string_ostream out(tmp);
93        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
94            << mStreamSetInputs.size() << " input stream sets.";
95        report_fatal_error(out.str());
96    }
97    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
98        std::string tmp;
99        raw_string_ostream out(tmp);
100        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
101            << mStreamSetOutputs.size() << " output stream sets.";
102        report_fatal_error(out.str());
103    }
104    const auto blockSize = iBuilder->getBitBlockWidth();
105    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
106        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
107            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
108        }
109        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
110        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
111            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
112        }       
113    }
114
115    IntegerType * const sizeTy = iBuilder->getSizeTy();
116    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
117        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
118        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
119            addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
120        }
121    }
122    for (const auto binding : mScalarInputs) {
123        addScalar(binding.type, binding.name);
124    }
125    for (const auto binding : mScalarOutputs) {
126        addScalar(binding.type, binding.name);
127    }
128    if (mStreamMap.empty()) {
129        prepareStreamSetNameMap();
130    }
131    for (auto binding : mInternalScalars) {
132        addScalar(binding.type, binding.name);
133    }
134
135    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
136    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
137        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
138    }
139
140    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
141    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
142
143    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
144        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
145    }
146
147    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
148}
149
150void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
151    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
152    assert ("IDISA Builder does not have a valid Module" && iBuilder->getModule());
153    std::stringstream cacheName;   
154    cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
155    for (const StreamSetBuffer * b: inputs) {
156        cacheName <<  ':' <<  b->getUniqueID();
157    }
158    for (const StreamSetBuffer * b: outputs) {
159        cacheName <<  ':' <<  b->getUniqueID();
160    }
161    Module * const kernelModule = new Module(cacheName.str(), iBuilder->getContext());
162    kernelModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
163    createKernelStub(inputs, outputs, kernelModule);
164}
165
166void Kernel::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, Module * const kernelModule) {
167    assert (mModule == nullptr);
168    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
169    assert (mStreamSetInputBuffers.empty());
170    assert (mStreamSetOutputBuffers.empty());
171
172    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
173        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
174                           " input stream sets but was given "
175                           + std::to_string(inputs.size()));
176    }
177
178    for (unsigned i = 0; i < inputs.size(); ++i) {
179        StreamSetBuffer * const buf = inputs[i];
180        if (LLVM_UNLIKELY(buf == nullptr)) {
181            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
182                               + " cannot be null");
183        }
184        buf->addConsumer(this);
185    }
186
187    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
188        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
189                           + " output stream sets but was given "
190                           + std::to_string(outputs.size()));
191    }
192
193    for (unsigned i = 0; i < outputs.size(); ++i) {
194        StreamSetBuffer * const buf = outputs[i];
195        if (LLVM_UNLIKELY(buf == nullptr)) {
196            report_fatal_error(getName() + ": output stream set " + std::to_string(i) + " cannot be null");
197        }
198        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
199            buf->setProducer(this);
200        } else {
201            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
202                               + " is already produced by kernel " + buf->getProducer()->getName());
203        }
204    }
205
206    mModule = kernelModule;
207
208    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
209    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
210
211    prepareKernel();
212}
213
214
215// Default kernel signature: generate the IR and emit as byte code.
216std::string Kernel::makeSignature() {
217    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
218    if (LLVM_LIKELY(moduleIDisSignature())) {
219        return getModule()->getModuleIdentifier();
220    } else {
221        generateKernel();
222        std::string signature;
223        raw_string_ostream OS(signature);
224        WriteBitcodeToFile(getModule(), OS);
225        return signature;
226    }
227}
228
229void Kernel::generateKernel() {
230    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
231    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
232    // generated the unoptimized IR.
233    if (!mIsGenerated) {
234        auto ip = iBuilder->saveIP();
235        auto saveInstance = getInstance();
236        addKernelDeclarations();
237        callGenerateInitializeMethod();
238        callGenerateDoSegmentMethod();       
239        callGenerateFinalizeMethod();
240        setInstance(saveInstance);
241        iBuilder->restoreIP(ip);
242        mIsGenerated = true;
243    }
244}
245
246inline void Kernel::callGenerateInitializeMethod() {
247    mCurrentMethod = getInitFunction(iBuilder->getModule());
248    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
249    Function::arg_iterator args = mCurrentMethod->arg_begin();
250    setInstance(&*(args++));
251    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
252    for (const auto & binding : mScalarInputs) {
253        setScalarField(binding.name, &*(args++));
254    }
255    for (const auto & binding : mStreamSetOutputs) {
256        setConsumerLock(binding.name, &*(args++));
257    }
258    generateInitializeMethod();
259    iBuilder->CreateRetVoid();
260}
261
262inline void Kernel::callGenerateDoSegmentMethod() {
263    mCurrentMethod = getDoSegmentFunction(iBuilder->getModule());
264    BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
265    iBuilder->SetInsertPoint(entry);
266    auto args = mCurrentMethod->arg_begin();
267    setInstance(&*(args++));
268    mIsFinal = &*(args++);
269    const auto n = mStreamSetInputs.size();
270    mAvailableItemCount.resize(n, nullptr);
271    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
272        mAvailableItemCount[i] = &*(args++);
273    }
274    generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
275    mIsFinal = nullptr;
276    mAvailableItemCount.clear();
277    iBuilder->CreateRetVoid();
278}
279
280inline void Kernel::callGenerateFinalizeMethod() {
281    mCurrentMethod = getTerminateFunction(iBuilder->getModule());
282    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
283    auto args = mCurrentMethod->arg_begin();
284    setInstance(&*(args++));
285    generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
286    const auto n = mScalarOutputs.size();
287    if (n == 0) {
288        iBuilder->CreateRetVoid();
289    } else {
290        Value * outputs[n];
291        for (unsigned i = 0; i < n; ++i) {
292            outputs[i] = getScalarField(mScalarOutputs[i].name);
293        }
294        if (n == 1) {
295            iBuilder->CreateRet(outputs[0]);
296        } else {
297            iBuilder->CreateAggregateRet(outputs, n);
298        }
299    }
300}
301
302unsigned Kernel::getScalarIndex(const std::string & name) const {
303    assert ("getScalarIndex was given a null IDISA Builder" && iBuilder);
304    const auto f = mKernelMap.find(name);
305    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
306        report_fatal_error(getName() + " does not contain scalar: " + name);
307    }
308    return f->second;
309}
310
311Value * Kernel::getProducedItemCount(const std::string & name, Value * doFinal) const {
312    Port port; unsigned ssIdx;
313    std::tie(port, ssIdx) = getStreamPort(name);
314    assert (port == Port::Output);
315    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
316        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
317        std::string principalField;
318        if (refSet.empty()) {
319            if (mStreamSetInputs.empty()) {
320                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
321            } else {
322                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
323            }
324        } else {
325            Port port; unsigned pfIndex;
326            std::tie(port, pfIndex) = getStreamPort(refSet);
327            if (port == Port::Input) {
328               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
329            } else {
330               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
331            }
332        }
333        Value * principalItemsProcessed = getScalarField(principalField);
334        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
335    }
336    return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
337}
338
339llvm::Value * Kernel::getAvailableItemCount(const std::string & name) const {
340    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
341        if (mStreamSetInputs[i].name == name) {
342            return mAvailableItemCount[i];
343        }
344    }
345    return nullptr;
346}
347
348Value * Kernel::getProcessedItemCount(const std::string & name) const {
349    Port port; unsigned ssIdx;
350    std::tie(port, ssIdx) = getStreamPort(name);
351    assert (port == Port::Input);
352    if (mStreamSetInputs[ssIdx].rate.isExact()) {
353        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
354        if (refSet.empty()) {
355            refSet = mStreamSetInputs[0].name;
356        }
357        Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
358        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
359    }
360    return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
361}
362
363Value * Kernel::getConsumedItemCount(const std::string & name) const {
364    return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
365}
366
367void Kernel::setProducedItemCount(const std::string & name, Value * value) const {
368    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
369}
370
371void Kernel::setProcessedItemCount(const std::string & name, Value * value) const {
372    setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
373}
374
375void Kernel::setConsumedItemCount(const std::string & name, Value * value) const {
376    setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
377}
378
379Value * Kernel::getTerminationSignal() const {
380    return getScalarField(TERMINATION_SIGNAL);
381}
382
383void Kernel::setTerminationSignal() const {
384    setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
385}
386
387LoadInst * Kernel::acquireLogicalSegmentNo() const {
388    assert (iBuilder);
389    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
390}
391
392void Kernel::releaseLogicalSegmentNo(Value * nextSegNo) const {
393    iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR));
394}
395
396llvm::Value * Kernel::getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const {
397    llvm::Value * instance = getStreamSetBufferPtr(name);
398    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
399    return buf->getLinearlyAccessibleItems(iBuilder, instance, fromPosition);
400}
401
402llvm::Value * Kernel::getConsumerLock(const std::string & name) const {
403    return getScalarField(name + CONSUMER_SUFFIX);
404}
405
406void Kernel::setConsumerLock(const std::string & name, llvm::Value * value) const {
407    setScalarField(name + CONSUMER_SUFFIX, value);
408}
409
410inline Value * Kernel::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
411    for (const Binding & b : bindings) {
412        if (b.name == name) {
413            const auto divisor = iBuilder->getBitBlockWidth();
414            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
415                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
416            } else {
417                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
418            }
419        }
420    }
421    report_fatal_error("Error: no binding in " + getName() + " for " + name);
422}
423
424Value * Kernel::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
425    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
426    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
427    return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
428}
429
430Value * Kernel::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
431    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
432}
433
434Value * Kernel::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
435    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
436    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
437    return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
438}
439
440Value * Kernel::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
441    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
442}
443
444llvm::Value * Kernel::getInputStreamSetCount(const std::string & name) const {
445    return getInputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
446}
447
448llvm::Value * Kernel::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
449    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
450    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
451    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
452    return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
453}
454
455Value * Kernel::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
456    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
457    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
458    return buf->getStreamBlockPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
459}
460
461void Kernel::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
462    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
463}
464
465Value * Kernel::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
466    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
467    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
468    return buf->getStreamPackPtr(iBuilder, getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
469}
470
471void Kernel::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
472    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
473}
474
475llvm::Value * Kernel::getOutputStreamSetCount(const std::string & name) const {
476    return getOutputStreamSetBuffer(name)->getStreamSetCount(iBuilder, getStreamSetBufferPtr(name));
477}
478
479Value * Kernel::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
480    return getInputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
481}
482
483Value * Kernel::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
484    return getOutputStreamSetBuffer(name)->getRawItemPointer(iBuilder, getStreamSetBufferPtr(name), streamIndex, absolutePosition);
485}
486
487Value * Kernel::getBaseAddress(const std::string & name) const {
488    return getAnyStreamSetBuffer(name)->getBaseAddress(iBuilder, getStreamSetBufferPtr(name));
489}
490
491void Kernel::setBaseAddress(const std::string & name, Value * const addr) const {
492    return getAnyStreamSetBuffer(name)->setBaseAddress(iBuilder, getStreamSetBufferPtr(name), addr);
493}
494
495Value * Kernel::getBufferedSize(const std::string & name) const {
496    return getAnyStreamSetBuffer(name)->getBufferedSize(iBuilder, getStreamSetBufferPtr(name));
497}
498
499void Kernel::setBufferedSize(const std::string & name, Value * size) const {
500    unsigned index; Port port;
501    std::tie(port, index) = getStreamPort(name);
502    const StreamSetBuffer * buf = nullptr;
503    if (port == Port::Input) {
504        assert (index < mStreamSetInputBuffers.size());
505        buf = mStreamSetInputBuffers[index];
506    } else {
507        assert (index < mStreamSetOutputBuffers.size());
508        buf = mStreamSetOutputBuffers[index];
509    }
510    buf->setBufferedSize(iBuilder, getStreamSetBufferPtr(name), size);
511}
512
513BasicBlock * Kernel::CreateWaitForConsumers() const {
514
515    const auto consumers = getStreamOutputs();
516    BasicBlock * const entry = iBuilder->GetInsertBlock();
517    if (consumers.empty()) {
518        return entry;
519    } else {
520        Function * const parent = entry->getParent();
521        IntegerType * const sizeTy = iBuilder->getSizeTy();
522        ConstantInt * const zero = iBuilder->getInt32(0);
523        ConstantInt * const one = iBuilder->getInt32(1);
524        ConstantInt * const size0 = iBuilder->getSize(0);
525
526        Value * const segNo = acquireLogicalSegmentNo();
527        const auto n = consumers.size();
528        BasicBlock * load[n + 1];
529        BasicBlock * wait[n];
530        for (unsigned i = 0; i < n; ++i) {
531            load[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Load", parent);
532            wait[i] = BasicBlock::Create(iBuilder->getContext(), consumers[i].name + "Wait", parent);
533        }
534        load[n] = BasicBlock::Create(iBuilder->getContext(), "Resume", parent);
535        iBuilder->CreateBr(load[0]);
536        for (unsigned i = 0; i < n; ++i) {
537
538            iBuilder->SetInsertPoint(load[i]);
539            Value * const outputConsumers = getConsumerLock(consumers[i].name);
540
541            Value * const consumerCount = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, zero}));
542            Value * const consumerPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(outputConsumers, {zero, one}));
543            Value * const noConsumers = iBuilder->CreateICmpEQ(consumerCount, size0);
544            iBuilder->CreateUnlikelyCondBr(noConsumers, load[i + 1], wait[i]);
545
546            iBuilder->SetInsertPoint(wait[i]);
547            PHINode * const consumerPhi = iBuilder->CreatePHI(sizeTy, 2);
548            consumerPhi->addIncoming(size0, load[i]);
549
550            Value * const conSegPtr = iBuilder->CreateLoad(iBuilder->CreateGEP(consumerPtr, consumerPhi));
551            Value * const processedSegmentCount = iBuilder->CreateAtomicLoadAcquire(conSegPtr);
552            Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
553            assert (ready->getType() == iBuilder->getInt1Ty());
554            Value * const nextConsumerIdx = iBuilder->CreateAdd(consumerPhi, iBuilder->CreateZExt(ready, sizeTy));
555            consumerPhi->addIncoming(nextConsumerIdx, wait[i]);
556            Value * const next = iBuilder->CreateICmpEQ(nextConsumerIdx, consumerCount);
557            iBuilder->CreateCondBr(next, load[i + 1], wait[i]);
558        }
559
560        BasicBlock * const exit = load[n];
561        iBuilder->SetInsertPoint(exit);
562        return exit;
563    }
564
565}
566
567Value * Kernel::getStreamSetBufferPtr(const std::string & name) const {
568    return getScalarField(name + BUFFER_PTR_SUFFIX);
569}
570
571//Argument * Kernel::getParameter(Function * const f, const std::string & name) const {
572//    for (auto & arg : f->getArgumentList()) {
573//        if (arg.getName().equals(name)) {
574//            return &arg;
575//        }
576//    }
577//    report_fatal_error(getName() + " does not have parameter " + name);
578//}
579
580CallInst * Kernel::createDoSegmentCall(const std::vector<Value *> & args) const {
581    Function * const doSegment = getDoSegmentFunction(iBuilder->getModule());
582    assert (doSegment->getArgumentList().size() == args.size());
583    return iBuilder->CreateCall(doSegment, args);
584}
585
586Value * Kernel::getAccumulator(const std::string & accumName) const {
587    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
588    if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
589        report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
590    }
591    const auto n = mScalarOutputs.size();
592    if (LLVM_UNLIKELY(n == 0)) {
593        report_fatal_error(getName() + " has no output scalars.");
594    } else {
595        for (unsigned i = 0; i < n; ++i) {
596            const Binding & b = mScalarOutputs[i];
597            if (b.name == accumName) {
598                if (n == 1) {
599                    return mOutputScalarResult;
600                } else {
601                    return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
602                }
603            }
604        }
605        report_fatal_error(getName() + " has no output scalar named " + accumName);
606    }
607}
608
609BasicBlock * Kernel::CreateBasicBlock(std::string && name) const {
610    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
611}
612
613Value * Kernel::createInstance() {
614    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
615    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
616        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
617    }
618    setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
619    return getInstance();
620}
621
622void Kernel::initializeInstance() {
623    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
624    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
625        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
626    }
627    std::vector<Value *> args;
628    args.reserve(1 + mInitialArguments.size() + mStreamSetInputBuffers.size() + (mStreamSetOutputBuffers.size() * 2));
629    args.push_back(getInstance());
630    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
631        Value * arg = mInitialArguments[i];
632        if (LLVM_UNLIKELY(arg == nullptr)) {
633            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
634                               + " cannot be null when calling createInstance()");
635        }
636        args.push_back(arg);
637    }
638    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
639        assert (mStreamSetInputBuffers[i]);
640        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
641        if (LLVM_UNLIKELY(arg == nullptr)) {
642            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
643                               + " was not allocated prior to calling createInstance()");
644        }
645        args.push_back(arg);
646    }
647    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
648    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
649        assert (mStreamSetOutputBuffers[i]);
650        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
651        if (LLVM_UNLIKELY(arg == nullptr)) {
652            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
653                               + " was not allocated prior to calling createInstance()");
654        }
655        args.push_back(arg);
656    }
657    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
658    IntegerType * const sizeTy = iBuilder->getSizeTy();
659    PointerType * const sizePtrTy = sizeTy->getPointerTo();
660    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
661    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
662    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
663        const auto output = mStreamSetOutputBuffers[i];
664        const auto & consumers = output->getConsumers();
665        const auto n = consumers.size();
666        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
667        Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
668        for (unsigned i = 0; i < n; ++i) {
669            Kernel * const consumer = consumers[i];
670            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
671            Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
672            iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
673        }
674        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
675        iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
676        Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
677        iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
678        args.push_back(outputConsumers);
679    }
680
681    iBuilder->CreateCall(getInitFunction(iBuilder->getModule()), args);
682}
683
684//  The default doSegment method dispatches to the doBlock routine for
685//  each block of the given number of blocksToDo, and then updates counts.
686
687void BlockOrientedKernel::generateDoSegmentMethod() {   
688    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
689    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
690    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
691    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
692    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
693    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
694
695    Value * baseTarget = nullptr;
696    if (useIndirectBr()) {
697        baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
698    }
699
700    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
701    Value * availablePos = mAvailableItemCount[0];
702    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
703    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
704    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
705
706    iBuilder->CreateBr(strideLoopCond);
707
708    iBuilder->SetInsertPoint(strideLoopCond);
709
710    PHINode * branchTarget = nullptr;
711    if (useIndirectBr()) {
712        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
713        branchTarget->addIncoming(baseTarget, entryBlock);
714    }
715
716    PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
717    stridesRemaining->addIncoming(stridesToDo, entryBlock);
718    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
719    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
720    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
721    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
722
723    iBuilder->SetInsertPoint(mStrideLoopBody);
724
725    if (useIndirectBr()) {
726        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
727        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
728    }
729
730    /// GENERATE DO BLOCK METHOD
731
732    writeDoBlockMethod();
733
734    /// UPDATE PROCESSED COUNTS
735
736    processed = getProcessedItemCount(mStreamSetInputs[0].name);
737    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
738    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
739
740    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
741
742    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
743    if (useIndirectBr()) {
744        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
745    }
746    iBuilder->CreateBr(strideLoopCond);
747
748    stridesDone->moveAfter(bodyEnd);
749
750    iBuilder->SetInsertPoint(stridesDone);
751
752    // Now conditionally perform the final block processing depending on the doFinal parameter.
753    if (useIndirectBr()) {
754        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
755        mStrideLoopBranch->addDestination(doFinalBlock);
756        mStrideLoopBranch->addDestination(segmentDone);
757    } else {
758        iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
759    }
760
761    doFinalBlock->moveAfter(stridesDone);
762
763    iBuilder->SetInsertPoint(doFinalBlock);
764
765    Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
766    writeFinalBlockMethod(remainingItems);
767
768    itemsDone = mAvailableItemCount[0];
769    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
770    setTerminationSignal();
771    iBuilder->CreateBr(segmentDone);
772
773    segmentDone->moveAfter(iBuilder->GetInsertBlock());
774
775    iBuilder->SetInsertPoint(segmentDone);
776
777    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
778    if (useIndirectBr()) {
779        MDBuilder mdb(iBuilder->getContext());
780        const auto destinations = mStrideLoopBranch->getNumDestinations();
781        uint32_t weights[destinations];
782        for (unsigned i = 0; i < destinations; ++i) {
783            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
784        }
785        ArrayRef<uint32_t> bw(weights, destinations);
786        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
787    }
788
789}
790
791inline void BlockOrientedKernel::writeDoBlockMethod() {
792
793    Value * const self = getInstance();
794    Function * const cp = mCurrentMethod;
795    auto ip = iBuilder->saveIP();
796
797    /// Check if the do block method is called and create the function if necessary   
798    if (!useIndirectBr()) {
799        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
800        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
801        mCurrentMethod->setCallingConv(CallingConv::C);
802        mCurrentMethod->setDoesNotThrow();
803        mCurrentMethod->setDoesNotCapture(1);
804        auto args = mCurrentMethod->arg_begin();
805        args->setName("self");
806        setInstance(&*args);
807        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
808    }
809
810    std::vector<Value *> priorProduced;
811    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
812        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
813            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
814        }
815    }
816
817    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
818
819    unsigned priorIdx = 0;
820    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
821        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
822        if (SwizzledCopybackBuffer * const cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
823            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
824            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
825            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
826            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
827            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
828            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
829            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(iBuilder, instance, priorBlock);
830            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
831            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
832            iBuilder->CreateCondBr(wraparound, copyBack, done);
833            iBuilder->SetInsertPoint(copyBack);
834            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
835            cb->createCopyBack(iBuilder, instance, copyItems);
836            iBuilder->CreateBr(done);
837            iBuilder->SetInsertPoint(done);
838            priorIdx++;
839        }
840        if (CircularCopybackBuffer * const cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
841            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
842            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
843            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
844            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
845            Value * accessible = cb->getLinearlyAccessibleItems(iBuilder, instance, priorProduced[priorIdx]);
846            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
847            iBuilder->CreateCondBr(wraparound, copyBack, done);
848            iBuilder->SetInsertPoint(copyBack);
849            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
850            cb->createCopyBack(iBuilder, instance, copyItems);
851            iBuilder->CreateBr(done);
852            iBuilder->SetInsertPoint(done);
853            priorIdx++;
854        }
855    }
856
857
858    /// Call the do block method if necessary then restore the current function state to the do segement method
859    if (!useIndirectBr()) {
860        iBuilder->CreateRetVoid();
861        mDoBlockMethod = mCurrentMethod;
862        iBuilder->restoreIP(ip);
863        iBuilder->CreateCall(mCurrentMethod, self);
864        setInstance(self);
865        mCurrentMethod = cp;
866    }
867
868}
869
870inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
871
872    Value * const self = getInstance();
873    Function * const cp = mCurrentMethod;
874    Value * const remainingItemCount = remainingItems;
875    auto ip = iBuilder->saveIP();
876
877    if (!useIndirectBr()) {
878        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
879        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
880        mCurrentMethod->setCallingConv(CallingConv::C);
881        mCurrentMethod->setDoesNotThrow();
882        mCurrentMethod->setDoesNotCapture(1);
883        auto args = mCurrentMethod->arg_begin();
884        args->setName("self");
885        setInstance(&*args);
886        remainingItems = &*(++args);
887        remainingItems->setName("remainingItems");
888        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
889    }
890
891    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
892
893    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
894
895    if (!useIndirectBr()) {
896        iBuilder->CreateRetVoid();       
897        iBuilder->restoreIP(ip);
898        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
899        mCurrentMethod = cp;
900        setInstance(self);
901    }
902
903}
904
905//  The default finalBlock method simply dispatches to the doBlock routine.
906void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
907    CreateDoBlockMethodCall();
908}
909
910bool BlockOrientedKernel::useIndirectBr() const {
911    return iBuilder->supportsIndirectBr();
912}
913
914void BlockOrientedKernel::CreateDoBlockMethodCall() {
915    if (useIndirectBr()) {
916        BasicBlock * bb = CreateBasicBlock("resume");
917        mStrideLoopBranch->addDestination(bb);
918        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
919        iBuilder->CreateBr(mStrideLoopBody);
920        bb->moveAfter(iBuilder->GetInsertBlock());
921        iBuilder->SetInsertPoint(bb);
922    } else {
923        iBuilder->CreateCall(mDoBlockMethod, getInstance());
924    }
925}
926
927void Kernel::finalizeInstance() {
928    assert ("KernelBuilder does not have a valid IDISA Builder" && iBuilder);
929    mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(iBuilder->getModule()), { getInstance() });
930}
931
932Kernel::StreamPort Kernel::getStreamPort(const std::string & name) const {
933    const auto f = mStreamMap.find(name);
934    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
935        report_fatal_error(getName() + " does not contain stream set " + name);
936    }
937    return f->second;
938}
939
940// CONSTRUCTOR
941Kernel::Kernel(std::string && kernelName,
942                             std::vector<Binding> && stream_inputs,
943                             std::vector<Binding> && stream_outputs,
944                             std::vector<Binding> && scalar_parameters,
945                             std::vector<Binding> && scalar_outputs,
946                             std::vector<Binding> && internal_scalars)
947: KernelInterface(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
948, mCurrentMethod(nullptr)
949, mNoTerminateAttribute(false)
950, mIsGenerated(false)
951, mIsFinal(nullptr)
952, mOutputScalarResult(nullptr) {
953
954}
955
956Kernel::~Kernel() {
957
958}
959
960// CONSTRUCTOR
961BlockOrientedKernel::BlockOrientedKernel(std::string && kernelName,
962                                         std::vector<Binding> && stream_inputs,
963                                         std::vector<Binding> && stream_outputs,
964                                         std::vector<Binding> && scalar_parameters,
965                                         std::vector<Binding> && scalar_outputs,
966                                         std::vector<Binding> && internal_scalars)
967: Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
968, mDoBlockMethod(nullptr)
969, mStrideLoopBody(nullptr)
970, mStrideLoopBranch(nullptr)
971, mStrideLoopTarget(nullptr) {
972
973}
974
975// CONSTRUCTOR
976SegmentOrientedKernel::SegmentOrientedKernel(std::string && kernelName,
977                                             std::vector<Binding> && stream_inputs,
978                                             std::vector<Binding> && stream_outputs,
979                                             std::vector<Binding> && scalar_parameters,
980                                             std::vector<Binding> && scalar_outputs,
981                                             std::vector<Binding> && internal_scalars)
982: Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
983
984}
985
986}
Note: See TracBrowser for help on using the repository browser.