source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5371

Last change on this file since 5371 was 5370, checked in by xuedongx, 3 years ago

Add1 processing rate; pablo Count only up to EOFbit

File size: 33.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Transforms/Utils/Local.h>
16
17static const auto DO_BLOCK_SUFFIX = "_DoBlock";
18
19static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
20
21static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
22
23static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
24
25static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
26
27static const auto TERMINATION_SIGNAL = "terminationSignal";
28
29static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
30
31static const auto BLOCK_MASK_SUFFIX = "_blkMask";
32
33using namespace llvm;
34using namespace kernel;
35using namespace parabix;
36
37unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
38    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
39        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
40    }
41    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
42        report_fatal_error(getName() + " already contains scalar field " + name);
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
53    }
54    const auto index = mKernelFields.size();
55    mKernelFields.push_back(type);
56    return index;
57}
58
59void KernelBuilder::prepareKernelSignature() {
60    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
61        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
62    }
63    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
64        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
65    }
66}
67   
68void KernelBuilder::prepareKernel() {
69    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
70        report_fatal_error("Cannot prepare kernel after kernel state finalized");
71    }
72    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
73        std::string tmp;
74        raw_string_ostream out(tmp);
75        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
76            << mStreamSetInputs.size() << " input stream sets.";
77        report_fatal_error(out.str());
78    }
79    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
80        std::string tmp;
81        raw_string_ostream out(tmp);
82        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
83            << mStreamSetOutputs.size() << " output stream sets.";
84        report_fatal_error(out.str());
85    }
86    const auto blockSize = iBuilder->getBitBlockWidth();
87    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
88        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
89            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
90        }
91        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
92        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
93            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
94        }
95       
96    }
97    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
98        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
99        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
100            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
101        }
102    }
103    for (const auto binding : mScalarInputs) {
104        addScalar(binding.type, binding.name);
105    }
106    for (const auto binding : mScalarOutputs) {
107        addScalar(binding.type, binding.name);
108    }
109    if (mStreamSetNameMap.empty()) {
110        prepareKernelSignature();
111    }
112    for (auto binding : mInternalScalars) {
113        addScalar(binding.type, binding.name);
114    }
115    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
116    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
117    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
118}
119
120std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
121    auto saveModule = iBuilder->getModule();
122    auto savePoint = iBuilder->saveIP();
123    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
124    iBuilder->setModule(module.get());
125    generateKernel(inputs, outputs);
126    iBuilder->setModule(saveModule);
127    iBuilder->restoreIP(savePoint);
128    return module;
129}
130
131void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
132
133    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
134    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
135        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
136            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
137                               + " cannot be null when calling generateKernel()");
138        }
139    }
140    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
141        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
142                           " input stream sets but generateKernel() was given "
143                           + std::to_string(mStreamSetInputBuffers.size()));
144    }
145
146    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
147    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
148        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
149            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
150                               + " cannot be null when calling generateKernel()");
151        }
152    }
153    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
154        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
155                           + " output stream sets but generateKernel() was given "
156                           + std::to_string(mStreamSetOutputBuffers.size()));
157    }
158
159
160    auto savePoint = iBuilder->saveIP();
161    prepareKernel(); // possibly overridden by the KernelBuilder subtype
162    addKernelDeclarations(iBuilder->getModule());
163    callGenerateInitMethod();
164    callGenerateDoSegmentMethod();
165    // Implement the accumulator get functions
166    for (auto binding : mScalarOutputs) {
167        Function * f = getAccumulatorFunction(binding.name);
168        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
169        Value * self = &*(f->arg_begin());
170        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
171        Value * retVal = iBuilder->CreateLoad(ptr);
172        iBuilder->CreateRet(retVal);
173    }
174    iBuilder->restoreIP(savePoint);
175}
176
177void KernelBuilder::callGenerateDoSegmentMethod() {
178    mCurrentMethod = getDoSegmentFunction();
179    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
180    auto args = mCurrentMethod->arg_begin();
181    mSelf = &*(args++);
182    Value * doFinal = &*(args++);
183    std::vector<Value *> producerPos;
184    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
185        producerPos.push_back(&*(args++));
186    }
187    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
188    iBuilder->CreateRetVoid();
189}
190
191void KernelBuilder::callGenerateInitMethod() {
192    mCurrentMethod = getInitFunction();
193    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
194    Function::arg_iterator args = mCurrentMethod->arg_begin();
195    mSelf = &*(args++);
196    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
197    for (auto binding : mScalarInputs) {
198        Value * param = &*(args++);
199        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
200        iBuilder->CreateStore(param, ptr);
201    }
202    generateInitMethod();
203    iBuilder->CreateRetVoid();
204}
205
206ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
207    const auto f = mKernelMap.find(name);
208    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
209        report_fatal_error(getName() + " does not contain scalar: " + name);
210    }
211    return iBuilder->getInt32(f->second);
212}
213
214unsigned KernelBuilder::getScalarCount() const {
215    return mKernelFields.size();
216}
217
218Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
219    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
220}
221
222Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
223    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
224}
225
226Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
227    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
228}
229
230Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
231    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
232}
233
234void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
235    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
236}
237
238void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
239    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
240}
241
242Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
243    unsigned ssIdx = getStreamSetIndex(name);
244    if (mStreamSetInputs[ssIdx].rate.isExact()) {
245        Value * principalItemsProcessed = getScalarField(instance, mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX);
246        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
247    }
248    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
249}
250
251Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
252    unsigned ssIdx = getStreamSetIndex(name);
253    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
254        std::string principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
255        Value * principalItemsProcessed = getScalarField(instance, principalField);
256        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
257    }
258    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
259}
260
261Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
262    unsigned ssIdx = getStreamSetIndex(name);
263    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
264        std::string principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
265        Value * principalItemsProcessed = getScalarField(instance, principalField);
266        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
267    }
268    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
269}
270
271void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
272    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
273}
274
275void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
276    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
277}
278
279Value * KernelBuilder::getTerminationSignal(Value * instance) const {
280    return getScalarField(instance, TERMINATION_SIGNAL);
281}
282
283void KernelBuilder::setTerminationSignal(Value * instance) const {
284    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
285}
286
287LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
288    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
289}
290
291void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
292    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
293}
294
295inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
296    for (const Binding & b : bindings) {
297        if (b.name == name) {
298            const auto divisor = iBuilder->getBitBlockWidth();
299            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
300                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
301            } else {
302                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
303            }
304        }
305    }
306    report_fatal_error("Error: no binding in " + getName() + " for " + name);
307}
308
309Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
310    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
311    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
312    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
313}
314
315Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
316    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
317}
318
319Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
320    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
321    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
322    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
323}
324
325Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
326    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
327}
328
329llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
330    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
331}
332
333llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
334    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
335    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
336    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
337    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
338}
339
340Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
341    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
342    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
343    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
344}
345
346void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
347    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
348}
349
350Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
351    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
352    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
353    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
354}
355
356void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
357    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
358}
359
360llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
361    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
362}
363
364Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
365    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
366}
367
368Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
369    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
370}
371
372unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
373    const auto f = mStreamSetNameMap.find(name);
374    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
375        report_fatal_error(getName() + " does not contain stream set: " + name);
376    }
377    return f->second;
378}
379
380Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
381    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
382}
383
384Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
385    for (auto & arg : f->getArgumentList()) {
386        if (arg.getName().equals(name)) {
387            return &arg;
388        }
389    }
390    report_fatal_error(getName() + " does not have parameter " + name);
391}
392
393Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
394    return iBuilder->CreateCall(getDoSegmentFunction(), args);
395}
396
397Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
398    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
399}
400
401BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
402    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
403}
404
405void KernelBuilder::createInstance() {
406    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
407        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
408    }
409    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
410
411    std::vector<Value *> args;
412    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
413    args.push_back(mKernelInstance);
414    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
415        Value * arg = mInitialArguments[i];
416        if (LLVM_UNLIKELY(arg == nullptr)) {
417            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
418                               + " cannot be null when calling createInstance()");
419        }
420        args.push_back(arg);
421    }
422    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
423        assert (mStreamSetInputBuffers[i]);
424        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
425        if (LLVM_UNLIKELY(arg == nullptr)) {
426            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
427                               + " was not allocated prior to calling createInstance()");
428        }
429        args.push_back(arg);
430    }
431    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
432    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
433        assert (mStreamSetOutputBuffers[i]);
434        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
435        if (LLVM_UNLIKELY(arg == nullptr)) {
436            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
437                               + " was not allocated prior to calling createInstance()");
438        }
439        args.push_back(arg);
440    }
441    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
442    iBuilder->CreateCall(getInitFunction(), args);
443}
444
445//  The default doSegment method dispatches to the doBlock routine for
446//  each block of the given number of blocksToDo, and then updates counts.
447
448void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
449
450    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
451    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
452    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
453    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
454    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
455    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
456
457    Value * baseTarget = nullptr;
458    if (useIndirectBr()) {
459        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
460    }
461
462    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
463    Value * availablePos = producerPos[0];
464    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
465        Value * p = producerPos[i];
466        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
467    }
468
469    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
470    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
471    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
472
473    iBuilder->CreateBr(strideLoopCond);
474
475    iBuilder->SetInsertPoint(strideLoopCond);
476
477    PHINode * branchTarget = nullptr;
478    if (useIndirectBr()) {
479        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
480        branchTarget->addIncoming(baseTarget, entryBlock);
481    }
482
483    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
484    stridesRemaining->addIncoming(stridesToDo, entryBlock);
485    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
486    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
487    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
488    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
489
490    iBuilder->SetInsertPoint(mStrideLoopBody);
491
492    if (useIndirectBr()) {
493        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
494        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
495    }
496
497    /// GENERATE DO BLOCK METHOD
498
499    writeDoBlockMethod();
500
501    /// UPDATE PROCESSED COUNTS
502
503    processed = getProcessedItemCount(mStreamSetInputs[0].name);
504    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
505    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
506
507    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
508
509    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
510    if (useIndirectBr()) {
511        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
512    }
513    iBuilder->CreateBr(strideLoopCond);
514
515    stridesDone->moveAfter(bodyEnd);
516
517    iBuilder->SetInsertPoint(stridesDone);
518
519    // Now conditionally perform the final block processing depending on the doFinal parameter.
520    if (useIndirectBr()) {
521        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
522        mStrideLoopBranch->addDestination(doFinalBlock);
523        mStrideLoopBranch->addDestination(segmentDone);
524    } else {
525        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
526    }
527
528    doFinalBlock->moveAfter(stridesDone);
529
530    iBuilder->SetInsertPoint(doFinalBlock);
531
532    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
533    writeFinalBlockMethod(remainingItems);
534
535    itemsDone = producerPos[0];
536    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
537    setTerminationSignal();
538    iBuilder->CreateBr(segmentDone);
539
540    segmentDone->moveAfter(iBuilder->GetInsertBlock());
541
542    iBuilder->SetInsertPoint(segmentDone);
543
544    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
545    if (useIndirectBr()) {
546        MDBuilder mdb(iBuilder->getContext());
547        const auto destinations = mStrideLoopBranch->getNumDestinations();
548        uint32_t weights[destinations];
549        for (unsigned i = 0; i < destinations; ++i) {
550            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
551        }
552        ArrayRef<uint32_t> bw(weights, destinations);
553        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
554    }
555
556}
557
558inline void BlockOrientedKernel::writeDoBlockMethod() {
559
560    Value * const self = mSelf;
561    Function * const cp = mCurrentMethod;
562    auto ip = iBuilder->saveIP();
563
564    /// Check if the do block method is called and create the function if necessary   
565    if (!useIndirectBr()) {
566        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
567        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
568        mCurrentMethod->setCallingConv(CallingConv::C);
569        mCurrentMethod->setDoesNotThrow();
570        mCurrentMethod->setDoesNotCapture(1);
571        auto args = mCurrentMethod->arg_begin();
572        mCurrentMethod = mCurrentMethod;
573        mSelf = &*args;
574        mSelf->setName("self");
575        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
576    }
577
578    std::vector<Value *> priorProduced;
579    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
580        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
581            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
582        }
583    }
584
585    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
586
587    unsigned priorIdx = 0;
588    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
589        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
590        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
591            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
592            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
593            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
594            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
595            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
596            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(priorBlock);
597            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
598            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
599            iBuilder->CreateCondBr(wraparound, copyBack, done);
600            iBuilder->SetInsertPoint(copyBack);
601            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
602            cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
603            iBuilder->CreateBr(done);
604            iBuilder->SetInsertPoint(done);
605            priorIdx++;
606        }
607        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
608            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
609            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
610            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
611            Value * accessible = cb->getLinearlyAccessibleItems(priorProduced[priorIdx]);
612            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
613            iBuilder->CreateCondBr(wraparound, copyBack, done);
614            iBuilder->SetInsertPoint(copyBack);
615            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
616            cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
617            iBuilder->CreateBr(done);
618            iBuilder->SetInsertPoint(done);
619            priorIdx++;
620        }
621    }
622
623    /// Call the do block method if necessary then restore the current function state to the do segement method
624    if (!useIndirectBr()) {
625        iBuilder->CreateRetVoid();
626        mDoBlockMethod = mCurrentMethod;
627        iBuilder->restoreIP(ip);
628        iBuilder->CreateCall(mCurrentMethod, self);
629        mSelf = self;
630        mCurrentMethod = cp;
631    }
632
633}
634
635inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
636
637    Value * const self = mSelf;
638    Function * const cp = mCurrentMethod;
639    Value * const remainingItemCount = remainingItems;
640    auto ip = iBuilder->saveIP();
641
642    if (!useIndirectBr()) {
643        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
644        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
645        mCurrentMethod->setCallingConv(CallingConv::C);
646        mCurrentMethod->setDoesNotThrow();
647        mCurrentMethod->setDoesNotCapture(1);
648        auto args = mCurrentMethod->arg_begin();
649        mSelf = &*args;
650        mSelf->setName("self");
651        remainingItems = &*(++args);
652        remainingItems->setName("remainingItems");
653        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
654    }
655
656    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
657
658    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
659
660    if (!useIndirectBr()) {
661        iBuilder->CreateRetVoid();       
662        iBuilder->restoreIP(ip);
663        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
664        mCurrentMethod = cp;
665        mSelf = self;
666    }
667
668}
669
670//  The default finalBlock method simply dispatches to the doBlock routine.
671void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
672    CreateDoBlockMethodCall();
673}
674
675void BlockOrientedKernel::CreateDoBlockMethodCall() {
676    if (useIndirectBr()) {
677        BasicBlock * bb = CreateBasicBlock("resume");
678        mStrideLoopBranch->addDestination(bb);
679        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
680        iBuilder->CreateBr(mStrideLoopBody);
681        bb->moveAfter(iBuilder->GetInsertBlock());
682        iBuilder->SetInsertPoint(bb);
683    } else {
684        iBuilder->CreateCall(mDoBlockMethod, mSelf);
685    }
686}
687
688// CONSTRUCTOR
689
690BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
691                                                           std::string && kernelName,
692                                                           std::vector<Binding> && stream_inputs,
693                                                           std::vector<Binding> && stream_outputs,
694                                                           std::vector<Binding> && scalar_parameters,
695                                                           std::vector<Binding> && scalar_outputs,
696                                                           std::vector<Binding> && internal_scalars)
697: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
698, mDoBlockMethod(nullptr)
699, mStrideLoopBody(nullptr)
700, mStrideLoopBranch(nullptr)
701, mStrideLoopTarget(nullptr) {
702
703}
704
705
706// CONSTRUCTOR
707KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
708                             std::string && kernelName,
709                             std::vector<Binding> && stream_inputs,
710                             std::vector<Binding> && stream_outputs,
711                             std::vector<Binding> && scalar_parameters,
712                             std::vector<Binding> && scalar_outputs,
713                             std::vector<Binding> && internal_scalars)
714: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
715, mSelf(nullptr)
716, mCurrentMethod(nullptr)
717, mNoTerminateAttribute(false) {
718
719}
720
721KernelBuilder::~KernelBuilder() { }
722
723// CONSTRUCTOR
724SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
725                                             std::string && kernelName,
726                                             std::vector<Binding> && stream_inputs,
727                                             std::vector<Binding> && stream_outputs,
728                                             std::vector<Binding> && scalar_parameters,
729                                             std::vector<Binding> && scalar_outputs,
730                                             std::vector<Binding> && internal_scalars)
731: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
732
733}
Note: See TracBrowser for help on using the repository browser.