source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5361

Last change on this file since 5361 was 5361, checked in by nmedfort, 2 years ago

Work on non-carry collapsing mode.

File size: 32.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Transforms/Utils/Local.h>
16
17static const auto DO_BLOCK_SUFFIX = "_DoBlock";
18
19static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
20
21static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
22
23static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
24
25static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
26
27static const auto TERMINATION_SIGNAL = "terminationSignal";
28
29static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
30
31static const auto BLOCK_MASK_SUFFIX = "_blkMask";
32
33using namespace llvm;
34using namespace kernel;
35using namespace parabix;
36
37unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
38    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
39        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
40    }
41    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
42        report_fatal_error(getName() + " already contains scalar field " + name);
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
53    }
54    const auto index = mKernelFields.size();
55    mKernelFields.push_back(type);
56    return index;
57}
58
59void KernelBuilder::prepareKernelSignature() {
60    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
61        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
62    }
63    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
64        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
65    }
66}
67   
68void KernelBuilder::prepareKernel() {
69    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
70        report_fatal_error("Cannot prepare kernel after kernel state finalized");
71    }
72    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
73        std::string tmp;
74        raw_string_ostream out(tmp);
75        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
76            << mStreamSetInputs.size() << " input stream sets.";
77        report_fatal_error(out.str());
78    }
79    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
80        std::string tmp;
81        raw_string_ostream out(tmp);
82        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
83            << mStreamSetOutputs.size() << " output stream sets.";
84        report_fatal_error(out.str());
85    }
86    const auto blockSize = iBuilder->getBitBlockWidth();
87    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
88        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
89            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
90        }
91        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
92        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
93            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
94        }
95       
96    }
97    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
98        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
99        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
100            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
101        }
102    }
103    for (const auto binding : mScalarInputs) {
104        addScalar(binding.type, binding.name);
105    }
106    for (const auto binding : mScalarOutputs) {
107        addScalar(binding.type, binding.name);
108    }
109    if (mStreamSetNameMap.empty()) {
110        prepareKernelSignature();
111    }
112    for (auto binding : mInternalScalars) {
113        addScalar(binding.type, binding.name);
114    }
115    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
116    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
117    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
118}
119
120std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
121    auto saveModule = iBuilder->getModule();
122    auto savePoint = iBuilder->saveIP();
123    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
124    iBuilder->setModule(module.get());
125    generateKernel(inputs, outputs);
126    iBuilder->setModule(saveModule);
127    iBuilder->restoreIP(savePoint);
128    return module;
129}
130
131void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
132
133    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
134    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
135        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
136            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
137                               + " cannot be null when calling generateKernel()");
138        }
139    }
140    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
141        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
142                           " input stream sets but generateKernel() was given "
143                           + std::to_string(mStreamSetInputBuffers.size()));
144    }
145
146    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
147    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
148        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
149            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
150                               + " cannot be null when calling generateKernel()");
151        }
152    }
153    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
154        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
155                           + " output stream sets but generateKernel() was given "
156                           + std::to_string(mStreamSetOutputBuffers.size()));
157    }
158
159
160    auto savePoint = iBuilder->saveIP();
161    prepareKernel(); // possibly overridden by the KernelBuilder subtype
162    addKernelDeclarations(iBuilder->getModule());
163    callGenerateInitMethod();
164    callGenerateDoSegmentMethod();
165    // Implement the accumulator get functions
166    for (auto binding : mScalarOutputs) {
167        Function * f = getAccumulatorFunction(binding.name);
168        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
169        Value * self = &*(f->arg_begin());
170        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
171        Value * retVal = iBuilder->CreateLoad(ptr);
172        iBuilder->CreateRet(retVal);
173    }
174    iBuilder->restoreIP(savePoint);
175}
176
177void KernelBuilder::callGenerateDoSegmentMethod() {
178    mCurrentMethod = getDoSegmentFunction();
179    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
180    auto args = mCurrentMethod->arg_begin();
181    mSelf = &*(args++);
182    Value * doFinal = &*(args++);
183    std::vector<Value *> producerPos;
184    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
185        producerPos.push_back(&*(args++));
186    }
187    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
188    iBuilder->CreateRetVoid();
189}
190
191void KernelBuilder::callGenerateInitMethod() {
192    mCurrentMethod = getInitFunction();
193    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
194    Function::arg_iterator args = mCurrentMethod->arg_begin();
195    mSelf = &*(args++);
196    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
197    for (auto binding : mScalarInputs) {
198        Value * param = &*(args++);
199        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
200        iBuilder->CreateStore(param, ptr);
201    }
202    generateInitMethod();
203    iBuilder->CreateRetVoid();
204}
205
206ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
207    const auto f = mKernelMap.find(name);
208    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
209        report_fatal_error(getName() + " does not contain scalar: " + name);
210    }
211    return iBuilder->getInt32(f->second);
212}
213
214unsigned KernelBuilder::getScalarCount() const {
215    return mKernelFields.size();
216}
217
218Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
219    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
220}
221
222Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
223    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
224}
225
226Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
227    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
228}
229
230Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
231    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
232}
233
234void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
235    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
236}
237
238void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
239    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
240}
241
242Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
243    unsigned ssIdx = getStreamSetIndex(name);
244    if (mStreamSetInputs[ssIdx].rate.isExact()) {
245        Value * principalItemsProcessed = getScalarField(instance, mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX);
246        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
247    }
248    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
249}
250
251Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
252    unsigned ssIdx = getStreamSetIndex(name);
253    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
254        std::string principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
255        Value * principalItemsProcessed = getScalarField(instance, principalField);
256        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
257    }
258    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
259}
260
261void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
262    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
263}
264
265void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
266    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
267}
268
269Value * KernelBuilder::getTerminationSignal(Value * instance) const {
270    return getScalarField(instance, TERMINATION_SIGNAL);
271}
272
273void KernelBuilder::setTerminationSignal(Value * instance) const {
274    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
275}
276
277LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
278    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
279}
280
281void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
282    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
283}
284
285inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
286    for (const Binding & b : bindings) {
287        if (b.name == name) {
288            const auto divisor = iBuilder->getBitBlockWidth();
289            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
290                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
291            } else {
292                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
293            }
294        }
295    }
296    report_fatal_error("Error: no binding in " + getName() + " for " + name);
297}
298
299Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
300    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
301    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
302    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
303}
304
305Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
306    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
307}
308
309Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
310    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
311    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
312    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
313}
314
315Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
316    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
317}
318
319llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
320    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
321}
322
323llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
324    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
325    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
326    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
327    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
328}
329
330Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
331    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
332    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
333    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
334}
335
336void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
337    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
338}
339
340Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
341    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
342    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
343    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
344}
345
346void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
347    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
348}
349
350llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
351    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
352}
353
354Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
355    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
356}
357
358Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
359    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
360}
361
362unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
363    const auto f = mStreamSetNameMap.find(name);
364    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
365        report_fatal_error(getName() + " does not contain stream set: " + name);
366    }
367    return f->second;
368}
369
370Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
371    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
372}
373
374Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
375    for (auto & arg : f->getArgumentList()) {
376        if (arg.getName().equals(name)) {
377            return &arg;
378        }
379    }
380    report_fatal_error(getName() + " does not have parameter " + name);
381}
382
383Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
384    return iBuilder->CreateCall(getDoSegmentFunction(), args);
385}
386
387Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
388    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
389}
390
391BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
392    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
393}
394
395void KernelBuilder::createInstance() {
396    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
397        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
398    }
399    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
400
401    std::vector<Value *> args;
402    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
403    args.push_back(mKernelInstance);
404    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
405        Value * arg = mInitialArguments[i];
406        if (LLVM_UNLIKELY(arg == nullptr)) {
407            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
408                               + " cannot be null when calling createInstance()");
409        }
410        args.push_back(arg);
411    }
412    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
413        assert (mStreamSetInputBuffers[i]);
414        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
415        if (LLVM_UNLIKELY(arg == nullptr)) {
416            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
417                               + " was not allocated prior to calling createInstance()");
418        }
419        args.push_back(arg);
420    }
421    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
422    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
423        assert (mStreamSetOutputBuffers[i]);
424        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
425        if (LLVM_UNLIKELY(arg == nullptr)) {
426            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
427                               + " was not allocated prior to calling createInstance()");
428        }
429        args.push_back(arg);
430    }
431    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
432    iBuilder->CreateCall(getInitFunction(), args);
433}
434
435//  The default doSegment method dispatches to the doBlock routine for
436//  each block of the given number of blocksToDo, and then updates counts.
437
438void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
439
440    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
441    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
442    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
443    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
444    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
445    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
446
447    Value * baseTarget = nullptr;
448    if (useIndirectBr()) {
449        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
450    }
451
452    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
453    Value * availablePos = producerPos[0];
454    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
455        Value * p = producerPos[i];
456        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
457    }
458
459    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
460    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
461    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
462
463    iBuilder->CreateBr(strideLoopCond);
464
465    iBuilder->SetInsertPoint(strideLoopCond);
466
467    PHINode * branchTarget = nullptr;
468    if (useIndirectBr()) {
469        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
470        branchTarget->addIncoming(baseTarget, entryBlock);
471    }
472
473    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
474    stridesRemaining->addIncoming(stridesToDo, entryBlock);
475    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
476    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
477    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
478    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
479
480    iBuilder->SetInsertPoint(mStrideLoopBody);
481
482    if (useIndirectBr()) {
483        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
484        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
485    }
486
487    /// GENERATE DO BLOCK METHOD
488
489    writeDoBlockMethod();
490
491    /// UPDATE PROCESSED COUNTS
492
493    processed = getProcessedItemCount(mStreamSetInputs[0].name);
494    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
495    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
496
497    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
498
499    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
500    if (useIndirectBr()) {
501        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
502    }
503    iBuilder->CreateBr(strideLoopCond);
504
505    stridesDone->moveAfter(bodyEnd);
506
507    iBuilder->SetInsertPoint(stridesDone);
508
509    // Now conditionally perform the final block processing depending on the doFinal parameter.
510    if (useIndirectBr()) {
511        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
512        mStrideLoopBranch->addDestination(doFinalBlock);
513        mStrideLoopBranch->addDestination(segmentDone);
514    } else {
515        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
516    }
517
518    doFinalBlock->moveAfter(stridesDone);
519
520    iBuilder->SetInsertPoint(doFinalBlock);
521
522    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
523    writeFinalBlockMethod(remainingItems);
524
525    itemsDone = producerPos[0];
526    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
527    setTerminationSignal();
528    iBuilder->CreateBr(segmentDone);
529
530    segmentDone->moveAfter(iBuilder->GetInsertBlock());
531
532    iBuilder->SetInsertPoint(segmentDone);
533
534    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
535    if (useIndirectBr()) {
536        MDBuilder mdb(iBuilder->getContext());
537        const auto destinations = mStrideLoopBranch->getNumDestinations();
538        uint32_t weights[destinations];
539        for (unsigned i = 0; i < destinations; ++i) {
540            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
541        }
542        ArrayRef<uint32_t> bw(weights, destinations);
543        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
544    }
545
546}
547
548inline void BlockOrientedKernel::writeDoBlockMethod() {
549
550    Value * const self = mSelf;
551    Function * const cp = mCurrentMethod;
552    auto ip = iBuilder->saveIP();
553
554    /// Check if the do block method is called and create the function if necessary   
555    if (!useIndirectBr()) {
556        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
557        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
558        mCurrentMethod->setCallingConv(CallingConv::C);
559        mCurrentMethod->setDoesNotThrow();
560        mCurrentMethod->setDoesNotCapture(1);
561        auto args = mCurrentMethod->arg_begin();
562        mCurrentMethod = mCurrentMethod;
563        mSelf = &*args;
564        mSelf->setName("self");
565        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
566    }
567
568    std::vector<Value *> priorProduced;
569    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
570        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
571            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
572        }
573    }
574
575    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
576
577    unsigned priorIdx = 0;
578    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
579        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
580        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
581            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
582            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
583            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
584            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
585            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
586            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(priorBlock);
587            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
588            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
589            iBuilder->CreateCondBr(wraparound, copyBack, done);
590            iBuilder->SetInsertPoint(copyBack);
591            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
592            cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
593            iBuilder->CreateBr(done);
594            iBuilder->SetInsertPoint(done);
595            priorIdx++;
596        }
597        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
598            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
599            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
600            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
601            Value * accessible = cb->getLinearlyAccessibleItems(priorProduced[priorIdx]);
602            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
603            iBuilder->CreateCondBr(wraparound, copyBack, done);
604            iBuilder->SetInsertPoint(copyBack);
605            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
606            cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
607            iBuilder->CreateBr(done);
608            iBuilder->SetInsertPoint(done);
609            priorIdx++;
610        }
611    }
612
613    /// Call the do block method if necessary then restore the current function state to the do segement method
614    if (!useIndirectBr()) {
615        iBuilder->CreateRetVoid();
616        mDoBlockMethod = mCurrentMethod;
617        iBuilder->restoreIP(ip);
618        iBuilder->CreateCall(mCurrentMethod, self);
619        mSelf = self;
620        mCurrentMethod = cp;
621    }
622
623}
624
625inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
626
627    Value * const self = mSelf;
628    Function * const cp = mCurrentMethod;
629    Value * const remainingItemCount = remainingItems;
630    auto ip = iBuilder->saveIP();
631
632    if (!useIndirectBr()) {
633        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
634        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
635        mCurrentMethod->setCallingConv(CallingConv::C);
636        mCurrentMethod->setDoesNotThrow();
637        mCurrentMethod->setDoesNotCapture(1);
638        auto args = mCurrentMethod->arg_begin();
639        mSelf = &*args;
640        mSelf->setName("self");
641        remainingItems = &*(++args);
642        remainingItems->setName("remainingItems");
643        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
644    }
645
646    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
647
648    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
649
650    if (!useIndirectBr()) {
651        iBuilder->CreateRetVoid();       
652        iBuilder->restoreIP(ip);
653        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
654        mCurrentMethod = cp;
655        mSelf = self;
656    }
657
658}
659
660//  The default finalBlock method simply dispatches to the doBlock routine.
661void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
662    CreateDoBlockMethodCall();
663}
664
665void BlockOrientedKernel::CreateDoBlockMethodCall() {
666    if (useIndirectBr()) {
667        BasicBlock * bb = CreateBasicBlock("resume");
668        mStrideLoopBranch->addDestination(bb);
669        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
670        iBuilder->CreateBr(mStrideLoopBody);
671        bb->moveAfter(iBuilder->GetInsertBlock());
672        iBuilder->SetInsertPoint(bb);
673    } else {
674        iBuilder->CreateCall(mDoBlockMethod, mSelf);
675    }
676}
677
678// CONSTRUCTOR
679
680BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
681                                                           std::string && kernelName,
682                                                           std::vector<Binding> && stream_inputs,
683                                                           std::vector<Binding> && stream_outputs,
684                                                           std::vector<Binding> && scalar_parameters,
685                                                           std::vector<Binding> && scalar_outputs,
686                                                           std::vector<Binding> && internal_scalars)
687: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
688, mDoBlockMethod(nullptr)
689, mStrideLoopBody(nullptr)
690, mStrideLoopBranch(nullptr)
691, mStrideLoopTarget(nullptr) {
692
693}
694
695
696// CONSTRUCTOR
697KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
698                             std::string && kernelName,
699                             std::vector<Binding> && stream_inputs,
700                             std::vector<Binding> && stream_outputs,
701                             std::vector<Binding> && scalar_parameters,
702                             std::vector<Binding> && scalar_outputs,
703                             std::vector<Binding> && internal_scalars)
704: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
705, mSelf(nullptr)
706, mCurrentMethod(nullptr)
707, mNoTerminateAttribute(false) {
708
709}
710
711KernelBuilder::~KernelBuilder() { }
712
713// CONSTRUCTOR
714SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
715                                             std::string && kernelName,
716                                             std::vector<Binding> && stream_inputs,
717                                             std::vector<Binding> && stream_outputs,
718                                             std::vector<Binding> && scalar_parameters,
719                                             std::vector<Binding> && scalar_outputs,
720                                             std::vector<Binding> && internal_scalars)
721: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
722
723}
Note: See TracBrowser for help on using the repository browser.