source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5390

Last change on this file since 5390 was 5390, checked in by nmedfort, 2 years ago

Minor changes in preparation of incorporating a consumed stream set position into select kernels.

File size: 35.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Transforms/Utils/Local.h>
16
17static const auto DO_BLOCK_SUFFIX = "_DoBlock";
18
19static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
20
21static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
22
23static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
24
25static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
26
27static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
28
29static const auto TERMINATION_SIGNAL = "terminationSignal";
30
31static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
32
33static const auto BLOCK_MASK_SUFFIX = "_blkMask";
34
35using namespace llvm;
36using namespace kernel;
37using namespace parabix;
38
39unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
40    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
41        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
42    }
43    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
44        report_fatal_error(getName() + " already contains scalar field " + name);
45    }
46    const auto index = mKernelFields.size();
47    mKernelMap.emplace(name, index);
48    mKernelFields.push_back(type);
49    return index;
50}
51
52unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
53    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
54        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
55    }
56    const auto index = mKernelFields.size();
57    mKernelFields.push_back(type);
58    return index;
59}
60
61void KernelBuilder::prepareKernelSignature() {
62    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
63        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
64    }
65    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
66        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
67    }
68}
69   
70void KernelBuilder::prepareKernel() {
71    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
72        report_fatal_error("Cannot prepare kernel after kernel state finalized");
73    }
74    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
75        std::string tmp;
76        raw_string_ostream out(tmp);
77        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
78            << mStreamSetInputs.size() << " input stream sets.";
79        report_fatal_error(out.str());
80    }
81    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
82        std::string tmp;
83        raw_string_ostream out(tmp);
84        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
85            << mStreamSetOutputs.size() << " output stream sets.";
86        report_fatal_error(out.str());
87    }
88    const auto blockSize = iBuilder->getBitBlockWidth();
89    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
90        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
91            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
92        }
93        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
94        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
95            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
96        }
97       
98    }
99    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
100        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
101        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
102            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
103        }
104    }
105    for (const auto binding : mScalarInputs) {
106        addScalar(binding.type, binding.name);
107    }
108    for (const auto binding : mScalarOutputs) {
109        addScalar(binding.type, binding.name);
110    }
111    if (mStreamSetNameMap.empty()) {
112        prepareKernelSignature();
113    }
114    for (auto binding : mInternalScalars) {
115        addScalar(binding.type, binding.name);
116    }
117    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
118    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
119    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
120}
121
122std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
123    auto saveModule = iBuilder->getModule();
124    auto savePoint = iBuilder->saveIP();
125    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
126    iBuilder->setModule(module.get());
127    generateKernel(inputs, outputs);
128    iBuilder->setModule(saveModule);
129    iBuilder->restoreIP(savePoint);
130    return module;
131}
132
133void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
134
135    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
136    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
137        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
138            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
139                               + " cannot be null when calling generateKernel()");
140        }
141    }
142    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
143        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
144                           " input stream sets but generateKernel() was given "
145                           + std::to_string(mStreamSetInputBuffers.size()));
146    }
147
148    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
149    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
150        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
151            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
152                               + " cannot be null when calling generateKernel()");
153        }
154    }
155    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
156        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
157                           + " output stream sets but generateKernel() was given "
158                           + std::to_string(mStreamSetOutputBuffers.size()));
159    }
160
161
162    auto savePoint = iBuilder->saveIP();
163    prepareKernel(); // possibly overridden by the KernelBuilder subtype
164    addKernelDeclarations(iBuilder->getModule());
165    callGenerateInitMethod();
166    callGenerateDoSegmentMethod();
167    // Implement the accumulator get functions
168    for (auto binding : mScalarOutputs) {
169        Function * f = getAccumulatorFunction(binding.name);
170        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
171        Value * self = &*(f->arg_begin());
172        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
173        Value * retVal = iBuilder->CreateLoad(ptr);
174        iBuilder->CreateRet(retVal);
175    }
176    iBuilder->restoreIP(savePoint);
177}
178
179void KernelBuilder::callGenerateDoSegmentMethod() {
180    mCurrentMethod = getDoSegmentFunction();
181    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
182    auto args = mCurrentMethod->arg_begin();
183    mSelf = &*(args++);
184    Value * doFinal = &*(args++);
185    std::vector<Value *> producerPos;
186    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
187        producerPos.push_back(&*(args++));
188    }
189    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
190    iBuilder->CreateRetVoid();
191}
192
193void KernelBuilder::callGenerateInitMethod() {
194    mCurrentMethod = getInitFunction();
195    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
196    Function::arg_iterator args = mCurrentMethod->arg_begin();
197    mSelf = &*(args++);
198    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
199    for (auto binding : mScalarInputs) {
200        Value * param = &*(args++);
201        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
202        iBuilder->CreateStore(param, ptr);
203    }
204    generateInitMethod();
205    iBuilder->CreateRetVoid();
206}
207
208ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
209    const auto f = mKernelMap.find(name);
210    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
211        report_fatal_error(getName() + " does not contain scalar: " + name);
212    }
213    return iBuilder->getInt32(f->second);
214}
215
216unsigned KernelBuilder::getScalarCount() const {
217    return mKernelFields.size();
218}
219
220Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
221    assert ("instance cannot be null!" && instance);
222    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
223}
224
225Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
226    assert ("instance cannot be null!" && instance);
227    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
228}
229
230Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
231    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
232}
233
234Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
235    assert ("instance cannot be null!" && instance);
236    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
237}
238
239void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
240    assert ("instance cannot be null!" && instance);
241    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
242}
243
244void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
245    assert ("instance cannot be null!" && instance);
246    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
247}
248
249Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
250    assert ("instance cannot be null!" && instance);
251    unsigned ssIdx = getStreamSetIndex(name);
252    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
253        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
254        std::string principalField;
255        if (refSet.empty()) {
256            if (mStreamSetInputs.empty()) {
257                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
258            } else {
259                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
260            }
261        } else {
262            unsigned pfIndex = getStreamSetIndex(refSet);
263            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
264               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
265            } else {
266               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
267            }
268        }
269        Value * principalItemsProcessed = getScalarField(instance, principalField);
270        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
271    }
272    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
273}
274
275llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
276    assert ("instance cannot be null!" && instance);
277    return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
278}
279
280Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
281    assert ("instance cannot be null!" && instance);
282    unsigned ssIdx = getStreamSetIndex(name);
283    if (mStreamSetInputs[ssIdx].rate.isExact()) {
284        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
285        if (refSet.empty()) {
286            refSet = mStreamSetInputs[0].name;
287        }
288        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
289        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
290    }
291    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
292}
293
294void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
295    assert ("instance cannot be null!" && instance);
296    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
297}
298
299void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
300    assert ("instance cannot be null!" && instance);
301    setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
302}
303
304void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
305    assert ("instance cannot be null!" && instance);
306    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
307}
308
309void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
310    assert ("instance cannot be null!" && instance);
311    Value * itemCount = getProducedItemCount(instance, name);
312    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
313    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
314}
315
316Value * KernelBuilder::getTerminationSignal(Value * instance) const {
317    assert ("instance cannot be null!" && instance);
318    return getScalarField(instance, TERMINATION_SIGNAL);
319}
320
321void KernelBuilder::setTerminationSignal(Value * instance) const {
322    assert ("instance cannot be null!" && instance);
323    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
324}
325
326LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
327    assert ("instance cannot be null!" && instance);
328    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
329}
330
331void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
332    assert ("instance cannot be null!" && instance);
333    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
334}
335
336inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
337    for (const Binding & b : bindings) {
338        if (b.name == name) {
339            const auto divisor = iBuilder->getBitBlockWidth();
340            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
341                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
342            } else {
343                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
344            }
345        }
346    }
347    report_fatal_error("Error: no binding in " + getName() + " for " + name);
348}
349
350Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
351    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
352    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
353    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
354}
355
356Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
357    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
358}
359
360Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
361    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
362    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
363    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
364}
365
366Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
367    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
368}
369
370llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
371    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
372}
373
374llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
375    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
376    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
377    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
378    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
379}
380
381Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
382    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
383    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
384    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
385}
386
387void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
388    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
389}
390
391Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
392    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
393    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
394    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
395}
396
397void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
398    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
399}
400
401llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
402    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
403}
404
405Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
406    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
407}
408
409Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
410    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
411}
412
413unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
414    const auto f = mStreamSetNameMap.find(name);
415    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
416        report_fatal_error(getName() + " does not contain stream set: " + name);
417    }
418    return f->second;
419}
420
421Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
422    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
423}
424
425Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
426    for (auto & arg : f->getArgumentList()) {
427        if (arg.getName().equals(name)) {
428            return &arg;
429        }
430    }
431    report_fatal_error(getName() + " does not have parameter " + name);
432}
433
434Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
435    return iBuilder->CreateCall(getDoSegmentFunction(), args);
436}
437
438Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
439    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
440}
441
442BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
443    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
444}
445
446void KernelBuilder::createInstance() {
447    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
448        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
449    }
450    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
451
452    std::vector<Value *> args;
453    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
454    args.push_back(mKernelInstance);
455    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
456        Value * arg = mInitialArguments[i];
457        if (LLVM_UNLIKELY(arg == nullptr)) {
458            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
459                               + " cannot be null when calling createInstance()");
460        }
461        args.push_back(arg);
462    }
463    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
464        assert (mStreamSetInputBuffers[i]);
465        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
466        if (LLVM_UNLIKELY(arg == nullptr)) {
467            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
468                               + " was not allocated prior to calling createInstance()");
469        }
470        args.push_back(arg);
471    }
472    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
473    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
474        assert (mStreamSetOutputBuffers[i]);
475        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
476        if (LLVM_UNLIKELY(arg == nullptr)) {
477            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
478                               + " was not allocated prior to calling createInstance()");
479        }
480        args.push_back(arg);
481    }
482    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
483    iBuilder->CreateCall(getInitFunction(), args);
484}
485
486//  The default doSegment method dispatches to the doBlock routine for
487//  each block of the given number of blocksToDo, and then updates counts.
488
489void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
490
491    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
492    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
493    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
494    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
495    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
496    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
497
498    Value * baseTarget = nullptr;
499    if (useIndirectBr()) {
500        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
501    }
502
503    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
504    Value * availablePos = producerPos[0];
505    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
506        Value * p = producerPos[i];
507        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
508    }
509
510    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
511    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
512    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
513
514    iBuilder->CreateBr(strideLoopCond);
515
516    iBuilder->SetInsertPoint(strideLoopCond);
517
518    PHINode * branchTarget = nullptr;
519    if (useIndirectBr()) {
520        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
521        branchTarget->addIncoming(baseTarget, entryBlock);
522    }
523
524    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
525    stridesRemaining->addIncoming(stridesToDo, entryBlock);
526    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
527    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
528    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
529    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
530
531    iBuilder->SetInsertPoint(mStrideLoopBody);
532
533    if (useIndirectBr()) {
534        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
535        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
536    }
537
538    /// GENERATE DO BLOCK METHOD
539
540    writeDoBlockMethod();
541
542    /// UPDATE PROCESSED COUNTS
543
544    processed = getProcessedItemCount(mStreamSetInputs[0].name);
545    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
546    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
547
548    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
549
550    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
551    if (useIndirectBr()) {
552        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
553    }
554    iBuilder->CreateBr(strideLoopCond);
555
556    stridesDone->moveAfter(bodyEnd);
557
558    iBuilder->SetInsertPoint(stridesDone);
559
560    // Now conditionally perform the final block processing depending on the doFinal parameter.
561    if (useIndirectBr()) {
562        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
563        mStrideLoopBranch->addDestination(doFinalBlock);
564        mStrideLoopBranch->addDestination(segmentDone);
565    } else {
566        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
567    }
568
569    doFinalBlock->moveAfter(stridesDone);
570
571    iBuilder->SetInsertPoint(doFinalBlock);
572
573    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
574    writeFinalBlockMethod(remainingItems);
575
576    itemsDone = producerPos[0];
577    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
578    setTerminationSignal();
579    iBuilder->CreateBr(segmentDone);
580
581    segmentDone->moveAfter(iBuilder->GetInsertBlock());
582
583    iBuilder->SetInsertPoint(segmentDone);
584
585    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
586    if (useIndirectBr()) {
587        MDBuilder mdb(iBuilder->getContext());
588        const auto destinations = mStrideLoopBranch->getNumDestinations();
589        uint32_t weights[destinations];
590        for (unsigned i = 0; i < destinations; ++i) {
591            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
592        }
593        ArrayRef<uint32_t> bw(weights, destinations);
594        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
595    }
596
597}
598
599inline void BlockOrientedKernel::writeDoBlockMethod() {
600
601    Value * const self = mSelf;
602    Function * const cp = mCurrentMethod;
603    auto ip = iBuilder->saveIP();
604
605    /// Check if the do block method is called and create the function if necessary   
606    if (!useIndirectBr()) {
607        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
608        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
609        mCurrentMethod->setCallingConv(CallingConv::C);
610        mCurrentMethod->setDoesNotThrow();
611        mCurrentMethod->setDoesNotCapture(1);
612        auto args = mCurrentMethod->arg_begin();
613        mCurrentMethod = mCurrentMethod;
614        mSelf = &*args;
615        mSelf->setName("self");
616        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
617    }
618
619    std::vector<Value *> priorProduced;
620    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
621        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
622            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
623        }
624    }
625
626    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
627
628    unsigned priorIdx = 0;
629    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
630        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
631        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
632            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
633            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
634            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
635            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
636            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
637            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
638            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
639            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
640            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
641            iBuilder->CreateCondBr(wraparound, copyBack, done);
642            iBuilder->SetInsertPoint(copyBack);
643            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
644            cb->createCopyBack(instance, copyItems);
645            iBuilder->CreateBr(done);
646            iBuilder->SetInsertPoint(done);
647            priorIdx++;
648        }
649        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
650            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
651            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
652            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
653            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
654            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
655            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
656            iBuilder->CreateCondBr(wraparound, copyBack, done);
657            iBuilder->SetInsertPoint(copyBack);
658            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
659            cb->createCopyBack(instance, copyItems);
660            iBuilder->CreateBr(done);
661            iBuilder->SetInsertPoint(done);
662            priorIdx++;
663        }
664    }
665
666    /// Call the do block method if necessary then restore the current function state to the do segement method
667    if (!useIndirectBr()) {
668        iBuilder->CreateRetVoid();
669        mDoBlockMethod = mCurrentMethod;
670        iBuilder->restoreIP(ip);
671        iBuilder->CreateCall(mCurrentMethod, self);
672        mSelf = self;
673        mCurrentMethod = cp;
674    }
675
676}
677
678inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
679
680    Value * const self = mSelf;
681    Function * const cp = mCurrentMethod;
682    Value * const remainingItemCount = remainingItems;
683    auto ip = iBuilder->saveIP();
684
685    if (!useIndirectBr()) {
686        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
687        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
688        mCurrentMethod->setCallingConv(CallingConv::C);
689        mCurrentMethod->setDoesNotThrow();
690        mCurrentMethod->setDoesNotCapture(1);
691        auto args = mCurrentMethod->arg_begin();
692        mSelf = &*args;
693        mSelf->setName("self");
694        remainingItems = &*(++args);
695        remainingItems->setName("remainingItems");
696        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
697    }
698
699    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
700
701    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
702
703    if (!useIndirectBr()) {
704        iBuilder->CreateRetVoid();       
705        iBuilder->restoreIP(ip);
706        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
707        mCurrentMethod = cp;
708        mSelf = self;
709    }
710
711}
712
713//  The default finalBlock method simply dispatches to the doBlock routine.
714void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
715    CreateDoBlockMethodCall();
716}
717
718void BlockOrientedKernel::CreateDoBlockMethodCall() {
719    if (useIndirectBr()) {
720        BasicBlock * bb = CreateBasicBlock("resume");
721        mStrideLoopBranch->addDestination(bb);
722        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
723        iBuilder->CreateBr(mStrideLoopBody);
724        bb->moveAfter(iBuilder->GetInsertBlock());
725        iBuilder->SetInsertPoint(bb);
726    } else {
727        iBuilder->CreateCall(mDoBlockMethod, mSelf);
728    }
729}
730
731// CONSTRUCTOR
732
733BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
734                                                           std::string && kernelName,
735                                                           std::vector<Binding> && stream_inputs,
736                                                           std::vector<Binding> && stream_outputs,
737                                                           std::vector<Binding> && scalar_parameters,
738                                                           std::vector<Binding> && scalar_outputs,
739                                                           std::vector<Binding> && internal_scalars)
740: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
741, mDoBlockMethod(nullptr)
742, mStrideLoopBody(nullptr)
743, mStrideLoopBranch(nullptr)
744, mStrideLoopTarget(nullptr) {
745
746}
747
748
749// CONSTRUCTOR
750KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
751                             std::string && kernelName,
752                             std::vector<Binding> && stream_inputs,
753                             std::vector<Binding> && stream_outputs,
754                             std::vector<Binding> && scalar_parameters,
755                             std::vector<Binding> && scalar_outputs,
756                             std::vector<Binding> && internal_scalars)
757: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
758, mSelf(nullptr)
759, mCurrentMethod(nullptr)
760, mNoTerminateAttribute(false) {
761
762}
763
764KernelBuilder::~KernelBuilder() { }
765
766// CONSTRUCTOR
767SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
768                                             std::string && kernelName,
769                                             std::vector<Binding> && stream_inputs,
770                                             std::vector<Binding> && stream_outputs,
771                                             std::vector<Binding> && scalar_parameters,
772                                             std::vector<Binding> && scalar_outputs,
773                                             std::vector<Binding> && internal_scalars)
774: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
775
776}
Note: See TracBrowser for help on using the repository browser.