source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5379

Last change on this file since 5379 was 5379, checked in by nmedfort, 2 years ago

Bug fixes for last check in

File size: 35.7 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Transforms/Utils/Local.h>
16
17static const auto DO_BLOCK_SUFFIX = "_DoBlock";
18
19static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
20
21static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
22
23static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
24
25static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
26
27static const auto TERMINATION_SIGNAL = "terminationSignal";
28
29static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
30
31static const auto BLOCK_MASK_SUFFIX = "_blkMask";
32
33using namespace llvm;
34using namespace kernel;
35using namespace parabix;
36
37unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
38    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
39        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
40    }
41    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
42        report_fatal_error(getName() + " already contains scalar field " + name);
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
53    }
54    const auto index = mKernelFields.size();
55    mKernelFields.push_back(type);
56    return index;
57}
58
59void KernelBuilder::prepareKernelSignature() {
60    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
61        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
62    }
63    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
64        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
65    }
66}
67   
68void KernelBuilder::prepareKernel() {
69    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
70        report_fatal_error("Cannot prepare kernel after kernel state finalized");
71    }
72    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
73        std::string tmp;
74        raw_string_ostream out(tmp);
75        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
76            << mStreamSetInputs.size() << " input stream sets.";
77        report_fatal_error(out.str());
78    }
79    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
80        std::string tmp;
81        raw_string_ostream out(tmp);
82        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
83            << mStreamSetOutputs.size() << " output stream sets.";
84        report_fatal_error(out.str());
85    }
86    const auto blockSize = iBuilder->getBitBlockWidth();
87    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
88        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
89            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
90        }
91        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
92        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
93            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
94        }
95       
96    }
97    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
98        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
99        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
100            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
101        }
102    }
103    for (const auto binding : mScalarInputs) {
104        addScalar(binding.type, binding.name);
105    }
106    for (const auto binding : mScalarOutputs) {
107        addScalar(binding.type, binding.name);
108    }
109    if (mStreamSetNameMap.empty()) {
110        prepareKernelSignature();
111    }
112    for (auto binding : mInternalScalars) {
113        addScalar(binding.type, binding.name);
114    }
115    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
116    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
117    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
118}
119
120std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
121    auto saveModule = iBuilder->getModule();
122    auto savePoint = iBuilder->saveIP();
123    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
124    iBuilder->setModule(module.get());
125    generateKernel(inputs, outputs);
126    iBuilder->setModule(saveModule);
127    iBuilder->restoreIP(savePoint);
128    return module;
129}
130
131void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
132
133    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
134    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
135        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
136            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
137                               + " cannot be null when calling generateKernel()");
138        }
139    }
140    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
141        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
142                           " input stream sets but generateKernel() was given "
143                           + std::to_string(mStreamSetInputBuffers.size()));
144    }
145
146    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
147    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
148        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
149            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
150                               + " cannot be null when calling generateKernel()");
151        }
152    }
153    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
154        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
155                           + " output stream sets but generateKernel() was given "
156                           + std::to_string(mStreamSetOutputBuffers.size()));
157    }
158
159
160    auto savePoint = iBuilder->saveIP();
161    prepareKernel(); // possibly overridden by the KernelBuilder subtype
162    addKernelDeclarations(iBuilder->getModule());
163    callGenerateInitMethod();
164    callGenerateDoSegmentMethod();
165    // Implement the accumulator get functions
166    for (auto binding : mScalarOutputs) {
167        Function * f = getAccumulatorFunction(binding.name);
168        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
169        Value * self = &*(f->arg_begin());
170        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
171        Value * retVal = iBuilder->CreateLoad(ptr);
172        iBuilder->CreateRet(retVal);
173    }
174    iBuilder->restoreIP(savePoint);
175}
176
177void KernelBuilder::callGenerateDoSegmentMethod() {
178    mCurrentMethod = getDoSegmentFunction();
179    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
180    auto args = mCurrentMethod->arg_begin();
181    mSelf = &*(args++);
182    Value * doFinal = &*(args++);
183    std::vector<Value *> producerPos;
184    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
185        producerPos.push_back(&*(args++));
186    }
187    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
188    iBuilder->CreateRetVoid();
189}
190
191void KernelBuilder::callGenerateInitMethod() {
192    mCurrentMethod = getInitFunction();
193    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
194    Function::arg_iterator args = mCurrentMethod->arg_begin();
195    mSelf = &*(args++);
196    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
197    for (auto binding : mScalarInputs) {
198        Value * param = &*(args++);
199        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
200        iBuilder->CreateStore(param, ptr);
201    }
202    generateInitMethod();
203    iBuilder->CreateRetVoid();
204}
205
206ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
207    const auto f = mKernelMap.find(name);
208    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
209        report_fatal_error(getName() + " does not contain scalar: " + name);
210    }
211    return iBuilder->getInt32(f->second);
212}
213
214unsigned KernelBuilder::getScalarCount() const {
215    return mKernelFields.size();
216}
217
218Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
219    assert ("instance cannot be null!" && instance);
220    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
221}
222
223Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
224    assert ("instance cannot be null!" && instance);
225    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
226}
227
228Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
229    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
230}
231
232Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
233    assert ("instance cannot be null!" && instance);
234    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
235}
236
237void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
238    assert ("instance cannot be null!" && instance);
239    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
240}
241
242void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
243    assert ("instance cannot be null!" && instance);
244    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
245}
246
247Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
248    assert ("instance cannot be null!" && instance);
249    unsigned ssIdx = getStreamSetIndex(name);
250    if (mStreamSetInputs[ssIdx].rate.isExact()) {
251        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
252        if (refSet.empty()) {
253            refSet = mStreamSetInputs[0].name;
254        }
255        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
256        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
257    }
258    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
259}
260
261Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
262    assert ("instance cannot be null!" && instance);
263    unsigned ssIdx = getStreamSetIndex(name);
264    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
265        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
266        std::string principalField;
267        if (refSet.empty()) {
268            if (mStreamSetInputs.empty()) {
269                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
270            } else {
271                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
272            }
273        } else {
274            unsigned pfIndex = getStreamSetIndex(refSet);
275            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
276               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
277            } else {
278               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
279            }
280        }
281        Value * principalItemsProcessed = getScalarField(instance, principalField);
282        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
283    }
284    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
285}
286
287Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
288    assert ("instance cannot be null!" && instance);
289    unsigned ssIdx = getStreamSetIndex(name);
290    std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
291    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
292        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
293        std::string principalField;
294        if (refSet.empty()) {
295            principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
296        } else {
297            unsigned pfIndex = getStreamSetIndex(refSet);
298            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
299               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
300            } else {
301               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
302            }
303        }
304        Value * principalItemsProcessed = getScalarField(instance, principalField);
305        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
306    }
307    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
308}
309
310void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
311    assert ("instance cannot be null!" && instance);
312    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
313}
314
315void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
316    assert ("instance cannot be null!" && instance);
317    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
318}
319
320void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
321    assert ("instance cannot be null!" && instance);
322    Value * itemCount = getProducedItemCount(instance, name);
323    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
324    buf->reserveBytes(getStreamSetBufferPtr(name), itemCount, value);
325}
326
327Value * KernelBuilder::getTerminationSignal(Value * instance) const {
328    assert ("instance cannot be null!" && instance);
329    return getScalarField(instance, TERMINATION_SIGNAL);
330}
331
332void KernelBuilder::setTerminationSignal(Value * instance) const {
333    assert ("instance cannot be null!" && instance);
334    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
335}
336
337LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
338    assert ("instance cannot be null!" && instance);
339    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
340}
341
342void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
343    assert ("instance cannot be null!" && instance);
344    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
345}
346
347inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
348    for (const Binding & b : bindings) {
349        if (b.name == name) {
350            const auto divisor = iBuilder->getBitBlockWidth();
351            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
352                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
353            } else {
354                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
355            }
356        }
357    }
358    report_fatal_error("Error: no binding in " + getName() + " for " + name);
359}
360
361Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
362    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
363    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
364    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
365}
366
367Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
368    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
369}
370
371Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
372    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
373    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
374    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
375}
376
377Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
378    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
379}
380
381llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
382    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
383}
384
385llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
386    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
387    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
388    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
389    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
390}
391
392Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
393    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
394    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
395    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
396}
397
398void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
399    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
400}
401
402Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
403    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
404    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
405    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
406}
407
408void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
409    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
410}
411
412llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
413    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
414}
415
416Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
417    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
418}
419
420Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
421    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
422}
423
424unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
425    const auto f = mStreamSetNameMap.find(name);
426    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
427        report_fatal_error(getName() + " does not contain stream set: " + name);
428    }
429    return f->second;
430}
431
432Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
433    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
434}
435
436Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
437    for (auto & arg : f->getArgumentList()) {
438        if (arg.getName().equals(name)) {
439            return &arg;
440        }
441    }
442    report_fatal_error(getName() + " does not have parameter " + name);
443}
444
445Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
446    return iBuilder->CreateCall(getDoSegmentFunction(), args);
447}
448
449Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
450    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
451}
452
453BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
454    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
455}
456
457void KernelBuilder::createInstance() {
458    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
459        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
460    }
461    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
462
463    std::vector<Value *> args;
464    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
465    args.push_back(mKernelInstance);
466    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
467        Value * arg = mInitialArguments[i];
468        if (LLVM_UNLIKELY(arg == nullptr)) {
469            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
470                               + " cannot be null when calling createInstance()");
471        }
472        args.push_back(arg);
473    }
474    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
475        assert (mStreamSetInputBuffers[i]);
476        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
477        if (LLVM_UNLIKELY(arg == nullptr)) {
478            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
479                               + " was not allocated prior to calling createInstance()");
480        }
481        args.push_back(arg);
482    }
483    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
484    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
485        assert (mStreamSetOutputBuffers[i]);
486        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
487        if (LLVM_UNLIKELY(arg == nullptr)) {
488            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
489                               + " was not allocated prior to calling createInstance()");
490        }
491        args.push_back(arg);
492    }
493    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
494    iBuilder->CreateCall(getInitFunction(), args);
495}
496
497//  The default doSegment method dispatches to the doBlock routine for
498//  each block of the given number of blocksToDo, and then updates counts.
499
500void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
501
502    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
503    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
504    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
505    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
506    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
507    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
508
509    Value * baseTarget = nullptr;
510    if (useIndirectBr()) {
511        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
512    }
513
514    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
515    Value * availablePos = producerPos[0];
516    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
517        Value * p = producerPos[i];
518        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
519    }
520
521    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
522    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
523    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
524
525    iBuilder->CreateBr(strideLoopCond);
526
527    iBuilder->SetInsertPoint(strideLoopCond);
528
529    PHINode * branchTarget = nullptr;
530    if (useIndirectBr()) {
531        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
532        branchTarget->addIncoming(baseTarget, entryBlock);
533    }
534
535    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
536    stridesRemaining->addIncoming(stridesToDo, entryBlock);
537    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
538    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
539    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
540    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
541
542    iBuilder->SetInsertPoint(mStrideLoopBody);
543
544    if (useIndirectBr()) {
545        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
546        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
547    }
548
549    /// GENERATE DO BLOCK METHOD
550
551    writeDoBlockMethod();
552
553    /// UPDATE PROCESSED COUNTS
554
555    processed = getProcessedItemCount(mStreamSetInputs[0].name);
556    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
557    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
558
559    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
560
561    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
562    if (useIndirectBr()) {
563        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
564    }
565    iBuilder->CreateBr(strideLoopCond);
566
567    stridesDone->moveAfter(bodyEnd);
568
569    iBuilder->SetInsertPoint(stridesDone);
570
571    // Now conditionally perform the final block processing depending on the doFinal parameter.
572    if (useIndirectBr()) {
573        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
574        mStrideLoopBranch->addDestination(doFinalBlock);
575        mStrideLoopBranch->addDestination(segmentDone);
576    } else {
577        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
578    }
579
580    doFinalBlock->moveAfter(stridesDone);
581
582    iBuilder->SetInsertPoint(doFinalBlock);
583
584    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
585    writeFinalBlockMethod(remainingItems);
586
587    itemsDone = producerPos[0];
588    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
589    setTerminationSignal();
590    iBuilder->CreateBr(segmentDone);
591
592    segmentDone->moveAfter(iBuilder->GetInsertBlock());
593
594    iBuilder->SetInsertPoint(segmentDone);
595
596    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
597    if (useIndirectBr()) {
598        MDBuilder mdb(iBuilder->getContext());
599        const auto destinations = mStrideLoopBranch->getNumDestinations();
600        uint32_t weights[destinations];
601        for (unsigned i = 0; i < destinations; ++i) {
602            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
603        }
604        ArrayRef<uint32_t> bw(weights, destinations);
605        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
606    }
607
608}
609
610inline void BlockOrientedKernel::writeDoBlockMethod() {
611
612    Value * const self = mSelf;
613    Function * const cp = mCurrentMethod;
614    auto ip = iBuilder->saveIP();
615
616    /// Check if the do block method is called and create the function if necessary   
617    if (!useIndirectBr()) {
618        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
619        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
620        mCurrentMethod->setCallingConv(CallingConv::C);
621        mCurrentMethod->setDoesNotThrow();
622        mCurrentMethod->setDoesNotCapture(1);
623        auto args = mCurrentMethod->arg_begin();
624        mCurrentMethod = mCurrentMethod;
625        mSelf = &*args;
626        mSelf->setName("self");
627        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
628    }
629
630    std::vector<Value *> priorProduced;
631    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
632        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
633            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
634        }
635    }
636
637    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
638
639    unsigned priorIdx = 0;
640    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
641        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
642        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
643            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
644            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
645            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
646            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
647            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
648            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
649            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
650            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
651            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
652            iBuilder->CreateCondBr(wraparound, copyBack, done);
653            iBuilder->SetInsertPoint(copyBack);
654            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
655            cb->createCopyBack(instance, copyItems);
656            iBuilder->CreateBr(done);
657            iBuilder->SetInsertPoint(done);
658            priorIdx++;
659        }
660        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
661            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
662            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
663            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
664            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
665            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
666            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
667            iBuilder->CreateCondBr(wraparound, copyBack, done);
668            iBuilder->SetInsertPoint(copyBack);
669            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
670            cb->createCopyBack(instance, copyItems);
671            iBuilder->CreateBr(done);
672            iBuilder->SetInsertPoint(done);
673            priorIdx++;
674        }
675    }
676
677    /// Call the do block method if necessary then restore the current function state to the do segement method
678    if (!useIndirectBr()) {
679        iBuilder->CreateRetVoid();
680        mDoBlockMethod = mCurrentMethod;
681        iBuilder->restoreIP(ip);
682        iBuilder->CreateCall(mCurrentMethod, self);
683        mSelf = self;
684        mCurrentMethod = cp;
685    }
686
687}
688
689inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
690
691    Value * const self = mSelf;
692    Function * const cp = mCurrentMethod;
693    Value * const remainingItemCount = remainingItems;
694    auto ip = iBuilder->saveIP();
695
696    if (!useIndirectBr()) {
697        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
698        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
699        mCurrentMethod->setCallingConv(CallingConv::C);
700        mCurrentMethod->setDoesNotThrow();
701        mCurrentMethod->setDoesNotCapture(1);
702        auto args = mCurrentMethod->arg_begin();
703        mSelf = &*args;
704        mSelf->setName("self");
705        remainingItems = &*(++args);
706        remainingItems->setName("remainingItems");
707        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
708    }
709
710    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
711
712    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
713
714    if (!useIndirectBr()) {
715        iBuilder->CreateRetVoid();       
716        iBuilder->restoreIP(ip);
717        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
718        mCurrentMethod = cp;
719        mSelf = self;
720    }
721
722}
723
724//  The default finalBlock method simply dispatches to the doBlock routine.
725void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
726    CreateDoBlockMethodCall();
727}
728
729void BlockOrientedKernel::CreateDoBlockMethodCall() {
730    if (useIndirectBr()) {
731        BasicBlock * bb = CreateBasicBlock("resume");
732        mStrideLoopBranch->addDestination(bb);
733        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
734        iBuilder->CreateBr(mStrideLoopBody);
735        bb->moveAfter(iBuilder->GetInsertBlock());
736        iBuilder->SetInsertPoint(bb);
737    } else {
738        iBuilder->CreateCall(mDoBlockMethod, mSelf);
739    }
740}
741
742// CONSTRUCTOR
743
744BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
745                                                           std::string && kernelName,
746                                                           std::vector<Binding> && stream_inputs,
747                                                           std::vector<Binding> && stream_outputs,
748                                                           std::vector<Binding> && scalar_parameters,
749                                                           std::vector<Binding> && scalar_outputs,
750                                                           std::vector<Binding> && internal_scalars)
751: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
752, mDoBlockMethod(nullptr)
753, mStrideLoopBody(nullptr)
754, mStrideLoopBranch(nullptr)
755, mStrideLoopTarget(nullptr) {
756
757}
758
759
760// CONSTRUCTOR
761KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
762                             std::string && kernelName,
763                             std::vector<Binding> && stream_inputs,
764                             std::vector<Binding> && stream_outputs,
765                             std::vector<Binding> && scalar_parameters,
766                             std::vector<Binding> && scalar_outputs,
767                             std::vector<Binding> && internal_scalars)
768: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
769, mSelf(nullptr)
770, mCurrentMethod(nullptr)
771, mNoTerminateAttribute(false) {
772
773}
774
775KernelBuilder::~KernelBuilder() { }
776
777// CONSTRUCTOR
778SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
779                                             std::string && kernelName,
780                                             std::vector<Binding> && stream_inputs,
781                                             std::vector<Binding> && stream_outputs,
782                                             std::vector<Binding> && scalar_parameters,
783                                             std::vector<Binding> && scalar_outputs,
784                                             std::vector<Binding> && internal_scalars)
785: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
786
787}
Note: See TracBrowser for help on using the repository browser.