source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5395

Last change on this file since 5395 was 5395, checked in by cameron, 2 years ago

Uniquify kernel names with buffer types/sizes; update u8u16 to use ParabixDriver?

File size: 35.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Bitcode/ReaderWriter.h>
16#include <llvm/Transforms/Utils/Local.h>
17
18static const auto DO_BLOCK_SUFFIX = "_DoBlock";
19
20static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
21
22static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
23
24static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
25
26static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
27
28static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
29
30static const auto TERMINATION_SIGNAL = "terminationSignal";
31
32static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
33
34static const auto BLOCK_MASK_SUFFIX = "_blkMask";
35
36using namespace llvm;
37using namespace kernel;
38using namespace parabix;
39
40unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
41    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
42        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
43    }
44    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
45        report_fatal_error(getName() + " already contains scalar field " + name);
46    }
47    const auto index = mKernelFields.size();
48    mKernelMap.emplace(name, index);
49    mKernelFields.push_back(type);
50    return index;
51}
52
53unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
54    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
55        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
56    }
57    const auto index = mKernelFields.size();
58    mKernelFields.push_back(type);
59    return index;
60}
61
62void KernelBuilder::prepareStreamSetNameMap() {
63    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
64        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
65    }
66    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
67        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
68    }
69}
70   
71void KernelBuilder::prepareKernel() {
72    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
73        report_fatal_error("Cannot prepare kernel after kernel state finalized");
74    }
75    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
76        std::string tmp;
77        raw_string_ostream out(tmp);
78        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
79            << mStreamSetInputs.size() << " input stream sets.";
80        report_fatal_error(out.str());
81    }
82    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
83        std::string tmp;
84        raw_string_ostream out(tmp);
85        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
86            << mStreamSetOutputs.size() << " output stream sets.";
87        report_fatal_error(out.str());
88    }
89    const auto blockSize = iBuilder->getBitBlockWidth();
90    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
91        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
92            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
93        }
94        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
95        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
96            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
97        }
98       
99    }
100    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
101        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
102        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
103            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
104        }
105    }
106    for (const auto binding : mScalarInputs) {
107        addScalar(binding.type, binding.name);
108    }
109    for (const auto binding : mScalarOutputs) {
110        addScalar(binding.type, binding.name);
111    }
112    if (mStreamSetNameMap.empty()) {
113        prepareStreamSetNameMap();
114    }
115    for (auto binding : mInternalScalars) {
116        addScalar(binding.type, binding.name);
117    }
118    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
119    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
120    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
121}
122
123std::unique_ptr<Module> KernelBuilder::createKernelStub() {
124    std::string cacheName = getName() + "_" + iBuilder->getBuilderUniqueName();
125    for (auto & b: mStreamSetInputBuffers) {
126        cacheName += ":" + b->getUniqueID();
127    }
128    for (auto & b: mStreamSetOutputBuffers) {
129        cacheName += ":" + b->getUniqueID();
130    }
131    return make_unique<Module>(cacheName, iBuilder->getContext());
132}
133
134void KernelBuilder::setCallParameters(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
135    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
136    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
137        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
138            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
139                               + " cannot be null when calling generateKernel()");
140        }
141    }
142    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
143        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
144                           " input stream sets but generateKernel() was given "
145                           + std::to_string(mStreamSetInputBuffers.size()));
146    }
147   
148    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
149    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
150        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
151            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
152                               + " cannot be null when calling generateKernel()");
153        }
154    }
155    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
156        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
157                           + " output stream sets but generateKernel() was given "
158                           + std::to_string(mStreamSetOutputBuffers.size()));
159    }
160    prepareKernel(); // possibly overridden by the KernelBuilder subtype
161   
162}   
163
164
165// Default kernel signature: generate the IR and emit as byte code.
166void KernelBuilder::generateKernelSignature(std::string &signature) {
167    generateKernel();
168    raw_string_ostream OS(signature);
169    WriteBitcodeToFile(iBuilder->getModule(), OS);
170}
171
172
173std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
174    auto saveModule = iBuilder->getModule();
175    auto savePoint = iBuilder->saveIP();
176    auto module = createKernelStub();
177    iBuilder->setModule(module.get());
178    generateKernel(inputs, outputs);
179    iBuilder->setModule(saveModule);
180    iBuilder->restoreIP(savePoint);
181    return module;
182}
183
184void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
185    setCallParameters(inputs, outputs);
186    generateKernel();
187}
188
189void KernelBuilder::generateKernel() {
190    if (mIsGenerated) return;
191    auto savePoint = iBuilder->saveIP();
192    addKernelDeclarations(iBuilder->getModule());
193    callGenerateInitMethod();
194    callGenerateDoSegmentMethod();
195    // Implement the accumulator get functions
196    for (auto binding : mScalarOutputs) {
197        Function * f = getAccumulatorFunction(binding.name);
198        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
199        Value * self = &*(f->arg_begin());
200        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
201        Value * retVal = iBuilder->CreateLoad(ptr);
202        iBuilder->CreateRet(retVal);
203    }
204    iBuilder->restoreIP(savePoint);
205    mIsGenerated = true;
206}
207
208void KernelBuilder::callGenerateDoSegmentMethod() {
209    mCurrentMethod = getDoSegmentFunction();
210    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
211    auto args = mCurrentMethod->arg_begin();
212    mSelf = &*(args++);
213    Value * doFinal = &*(args++);
214    std::vector<Value *> producerPos;
215    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
216        producerPos.push_back(&*(args++));
217    }
218    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
219    iBuilder->CreateRetVoid();
220}
221
222void KernelBuilder::callGenerateInitMethod() {
223    mCurrentMethod = getInitFunction();
224    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
225    Function::arg_iterator args = mCurrentMethod->arg_begin();
226    mSelf = &*(args++);
227    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
228    for (auto binding : mScalarInputs) {
229        Value * param = &*(args++);
230        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
231        iBuilder->CreateStore(param, ptr);
232    }
233    generateInitMethod();
234    iBuilder->CreateRetVoid();
235}
236
237ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
238    const auto f = mKernelMap.find(name);
239    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
240        report_fatal_error(getName() + " does not contain scalar: " + name);
241    }
242    return iBuilder->getInt32(f->second);
243}
244
245unsigned KernelBuilder::getScalarCount() const {
246    return mKernelFields.size();
247}
248
249Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
250    assert ("instance cannot be null!" && instance);
251    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
252}
253
254Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
255    assert ("instance cannot be null!" && instance);
256    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
257}
258
259Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
260    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
261}
262
263Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
264    assert ("instance cannot be null!" && instance);
265    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
266}
267
268void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
269    assert ("instance cannot be null!" && instance);
270    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
271}
272
273void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
274    assert ("instance cannot be null!" && instance);
275    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
276}
277
278Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
279    assert ("instance cannot be null!" && instance);
280    unsigned ssIdx = getStreamSetIndex(name);
281    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
282        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
283        std::string principalField;
284        if (refSet.empty()) {
285            if (mStreamSetInputs.empty()) {
286                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
287            } else {
288                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
289            }
290        } else {
291            unsigned pfIndex = getStreamSetIndex(refSet);
292            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
293               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
294            } else {
295               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
296            }
297        }
298        Value * principalItemsProcessed = getScalarField(instance, principalField);
299        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
300    }
301    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
302}
303
304llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
305    assert ("instance cannot be null!" && instance);
306    return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
307}
308
309Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
310    assert ("instance cannot be null!" && instance);
311    unsigned ssIdx = getStreamSetIndex(name);
312    if (mStreamSetInputs[ssIdx].rate.isExact()) {
313        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
314        if (refSet.empty()) {
315            refSet = mStreamSetInputs[0].name;
316        }
317        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
318        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
319    }
320    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
321}
322
323void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
324    assert ("instance cannot be null!" && instance);
325    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
326}
327
328void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
329    assert ("instance cannot be null!" && instance);
330    setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
331}
332
333void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
334    assert ("instance cannot be null!" && instance);
335    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
336}
337
338void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
339    assert ("instance cannot be null!" && instance);
340    Value * itemCount = getProducedItemCount(instance, name);
341    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
342    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
343}
344
345Value * KernelBuilder::getTerminationSignal(Value * instance) const {
346    assert ("instance cannot be null!" && instance);
347    return getScalarField(instance, TERMINATION_SIGNAL);
348}
349
350void KernelBuilder::setTerminationSignal(Value * instance) const {
351    assert ("instance cannot be null!" && instance);
352    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
353}
354
355LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
356    assert ("instance cannot be null!" && instance);
357    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
358}
359
360void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
361    assert ("instance cannot be null!" && instance);
362    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
363}
364
365inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
366    for (const Binding & b : bindings) {
367        if (b.name == name) {
368            const auto divisor = iBuilder->getBitBlockWidth();
369            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
370                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
371            } else {
372                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
373            }
374        }
375    }
376    report_fatal_error("Error: no binding in " + getName() + " for " + name);
377}
378
379Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
380    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
381    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
382    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
383}
384
385Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
386    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
387}
388
389Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
390    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
391    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
392    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
393}
394
395Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
396    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
397}
398
399llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
400    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
401}
402
403llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
404    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
405    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
406    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
407    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
408}
409
410Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
411    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
412    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
413    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
414}
415
416void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
417    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
418}
419
420Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
421    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
422    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
423    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
424}
425
426void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
427    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
428}
429
430llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
431    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
432}
433
434Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
435    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
436}
437
438Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
439    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
440}
441
442unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
443    const auto f = mStreamSetNameMap.find(name);
444    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
445        report_fatal_error(getName() + " does not contain stream set: " + name);
446    }
447    return f->second;
448}
449
450Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
451    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
452}
453
454Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
455    for (auto & arg : f->getArgumentList()) {
456        if (arg.getName().equals(name)) {
457            return &arg;
458        }
459    }
460    report_fatal_error(getName() + " does not have parameter " + name);
461}
462
463Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
464    return iBuilder->CreateCall(getDoSegmentFunction(), args);
465}
466
467Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
468    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
469}
470
471BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
472    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
473}
474
475void KernelBuilder::createInstance() {
476    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
477        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
478    }
479    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
480
481    std::vector<Value *> args;
482    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
483    args.push_back(mKernelInstance);
484    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
485        Value * arg = mInitialArguments[i];
486        if (LLVM_UNLIKELY(arg == nullptr)) {
487            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
488                               + " cannot be null when calling createInstance()");
489        }
490        args.push_back(arg);
491    }
492    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
493        assert (mStreamSetInputBuffers[i]);
494        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
495        if (LLVM_UNLIKELY(arg == nullptr)) {
496            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
497                               + " was not allocated prior to calling createInstance()");
498        }
499        args.push_back(arg);
500    }
501    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
502    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
503        assert (mStreamSetOutputBuffers[i]);
504        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
505        if (LLVM_UNLIKELY(arg == nullptr)) {
506            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
507                               + " was not allocated prior to calling createInstance()");
508        }
509        args.push_back(arg);
510    }
511    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
512    iBuilder->CreateCall(getInitFunction(), args);
513}
514
515//  The default doSegment method dispatches to the doBlock routine for
516//  each block of the given number of blocksToDo, and then updates counts.
517
518void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
519
520    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
521    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
522    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
523    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
524    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
525    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
526
527    Value * baseTarget = nullptr;
528    if (useIndirectBr()) {
529        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
530    }
531
532    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
533    Value * availablePos = producerPos[0];
534    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
535    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
536    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
537
538    iBuilder->CreateBr(strideLoopCond);
539
540    iBuilder->SetInsertPoint(strideLoopCond);
541
542    PHINode * branchTarget = nullptr;
543    if (useIndirectBr()) {
544        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
545        branchTarget->addIncoming(baseTarget, entryBlock);
546    }
547
548    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
549    stridesRemaining->addIncoming(stridesToDo, entryBlock);
550    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
551    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
552    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
553    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
554
555    iBuilder->SetInsertPoint(mStrideLoopBody);
556
557    if (useIndirectBr()) {
558        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
559        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
560    }
561
562    /// GENERATE DO BLOCK METHOD
563
564    writeDoBlockMethod();
565
566    /// UPDATE PROCESSED COUNTS
567
568    processed = getProcessedItemCount(mStreamSetInputs[0].name);
569    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
570    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
571
572    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
573
574    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
575    if (useIndirectBr()) {
576        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
577    }
578    iBuilder->CreateBr(strideLoopCond);
579
580    stridesDone->moveAfter(bodyEnd);
581
582    iBuilder->SetInsertPoint(stridesDone);
583
584    // Now conditionally perform the final block processing depending on the doFinal parameter.
585    if (useIndirectBr()) {
586        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
587        mStrideLoopBranch->addDestination(doFinalBlock);
588        mStrideLoopBranch->addDestination(segmentDone);
589    } else {
590        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
591    }
592
593    doFinalBlock->moveAfter(stridesDone);
594
595    iBuilder->SetInsertPoint(doFinalBlock);
596
597    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
598    writeFinalBlockMethod(remainingItems);
599
600    itemsDone = producerPos[0];
601    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
602    setTerminationSignal();
603    iBuilder->CreateBr(segmentDone);
604
605    segmentDone->moveAfter(iBuilder->GetInsertBlock());
606
607    iBuilder->SetInsertPoint(segmentDone);
608
609    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
610    if (useIndirectBr()) {
611        MDBuilder mdb(iBuilder->getContext());
612        const auto destinations = mStrideLoopBranch->getNumDestinations();
613        uint32_t weights[destinations];
614        for (unsigned i = 0; i < destinations; ++i) {
615            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
616        }
617        ArrayRef<uint32_t> bw(weights, destinations);
618        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
619    }
620
621}
622
623inline void BlockOrientedKernel::writeDoBlockMethod() {
624
625    Value * const self = mSelf;
626    Function * const cp = mCurrentMethod;
627    auto ip = iBuilder->saveIP();
628
629    /// Check if the do block method is called and create the function if necessary   
630    if (!useIndirectBr()) {
631        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
632        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
633        mCurrentMethod->setCallingConv(CallingConv::C);
634        mCurrentMethod->setDoesNotThrow();
635        mCurrentMethod->setDoesNotCapture(1);
636        auto args = mCurrentMethod->arg_begin();
637        mCurrentMethod = mCurrentMethod;
638        mSelf = &*args;
639        mSelf->setName("self");
640        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
641    }
642
643    std::vector<Value *> priorProduced;
644    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
645        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
646            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
647        }
648    }
649
650    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
651
652    unsigned priorIdx = 0;
653    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
654        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
655        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
656            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
657            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
658            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
659            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
660            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
661            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
662            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
663            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
664            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
665            iBuilder->CreateCondBr(wraparound, copyBack, done);
666            iBuilder->SetInsertPoint(copyBack);
667            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
668            cb->createCopyBack(instance, copyItems);
669            iBuilder->CreateBr(done);
670            iBuilder->SetInsertPoint(done);
671            priorIdx++;
672        }
673        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
674            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
675            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
676            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
677            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
678            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
679            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
680            iBuilder->CreateCondBr(wraparound, copyBack, done);
681            iBuilder->SetInsertPoint(copyBack);
682            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
683            cb->createCopyBack(instance, copyItems);
684            iBuilder->CreateBr(done);
685            iBuilder->SetInsertPoint(done);
686            priorIdx++;
687        }
688    }
689
690    /// Call the do block method if necessary then restore the current function state to the do segement method
691    if (!useIndirectBr()) {
692        iBuilder->CreateRetVoid();
693        mDoBlockMethod = mCurrentMethod;
694        iBuilder->restoreIP(ip);
695        iBuilder->CreateCall(mCurrentMethod, self);
696        mSelf = self;
697        mCurrentMethod = cp;
698    }
699
700}
701
702inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
703
704    Value * const self = mSelf;
705    Function * const cp = mCurrentMethod;
706    Value * const remainingItemCount = remainingItems;
707    auto ip = iBuilder->saveIP();
708
709    if (!useIndirectBr()) {
710        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
711        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
712        mCurrentMethod->setCallingConv(CallingConv::C);
713        mCurrentMethod->setDoesNotThrow();
714        mCurrentMethod->setDoesNotCapture(1);
715        auto args = mCurrentMethod->arg_begin();
716        mSelf = &*args;
717        mSelf->setName("self");
718        remainingItems = &*(++args);
719        remainingItems->setName("remainingItems");
720        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
721    }
722
723    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
724
725    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
726
727    if (!useIndirectBr()) {
728        iBuilder->CreateRetVoid();       
729        iBuilder->restoreIP(ip);
730        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
731        mCurrentMethod = cp;
732        mSelf = self;
733    }
734
735}
736
737//  The default finalBlock method simply dispatches to the doBlock routine.
738void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
739    CreateDoBlockMethodCall();
740}
741
742void BlockOrientedKernel::CreateDoBlockMethodCall() {
743    if (useIndirectBr()) {
744        BasicBlock * bb = CreateBasicBlock("resume");
745        mStrideLoopBranch->addDestination(bb);
746        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
747        iBuilder->CreateBr(mStrideLoopBody);
748        bb->moveAfter(iBuilder->GetInsertBlock());
749        iBuilder->SetInsertPoint(bb);
750    } else {
751        iBuilder->CreateCall(mDoBlockMethod, mSelf);
752    }
753}
754
755// CONSTRUCTOR
756
757BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
758                                                           std::string && kernelName,
759                                                           std::vector<Binding> && stream_inputs,
760                                                           std::vector<Binding> && stream_outputs,
761                                                           std::vector<Binding> && scalar_parameters,
762                                                           std::vector<Binding> && scalar_outputs,
763                                                           std::vector<Binding> && internal_scalars)
764: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
765, mDoBlockMethod(nullptr)
766, mStrideLoopBody(nullptr)
767, mStrideLoopBranch(nullptr)
768, mStrideLoopTarget(nullptr) {
769
770}
771
772
773// CONSTRUCTOR
774KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
775                             std::string && kernelName,
776                             std::vector<Binding> && stream_inputs,
777                             std::vector<Binding> && stream_outputs,
778                             std::vector<Binding> && scalar_parameters,
779                             std::vector<Binding> && scalar_outputs,
780                             std::vector<Binding> && internal_scalars)
781: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
782, mSelf(nullptr)
783, mCurrentMethod(nullptr)
784, mNoTerminateAttribute(false) 
785, mIsGenerated(false) {
786
787}
788
789KernelBuilder::~KernelBuilder() { }
790
791// CONSTRUCTOR
792SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
793                                             std::string && kernelName,
794                                             std::vector<Binding> && stream_inputs,
795                                             std::vector<Binding> && stream_outputs,
796                                             std::vector<Binding> && scalar_parameters,
797                                             std::vector<Binding> && scalar_outputs,
798                                             std::vector<Binding> && internal_scalars)
799: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
800
801}
Note: See TracBrowser for help on using the repository browser.