source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5391

Last change on this file since 5391 was 5391, checked in by cameron, 2 years ago

ParabixDriver/ObjectCache? separate compilation and linking: initial check-in with wc

File size: 35.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Transforms/Utils/Local.h>
16
17static const auto DO_BLOCK_SUFFIX = "_DoBlock";
18
19static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
20
21static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
22
23static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
24
25static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
26
27static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
28
29static const auto TERMINATION_SIGNAL = "terminationSignal";
30
31static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
32
33static const auto BLOCK_MASK_SUFFIX = "_blkMask";
34
35using namespace llvm;
36using namespace kernel;
37using namespace parabix;
38
39unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
40    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
41        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
42    }
43    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
44        report_fatal_error(getName() + " already contains scalar field " + name);
45    }
46    const auto index = mKernelFields.size();
47    mKernelMap.emplace(name, index);
48    mKernelFields.push_back(type);
49    return index;
50}
51
52unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
53    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
54        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
55    }
56    const auto index = mKernelFields.size();
57    mKernelFields.push_back(type);
58    return index;
59}
60
61void KernelBuilder::prepareKernelSignature() {
62    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
63        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
64    }
65    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
66        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
67    }
68}
69   
70void KernelBuilder::prepareKernel() {
71    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
72        report_fatal_error("Cannot prepare kernel after kernel state finalized");
73    }
74    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
75        std::string tmp;
76        raw_string_ostream out(tmp);
77        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
78            << mStreamSetInputs.size() << " input stream sets.";
79        report_fatal_error(out.str());
80    }
81    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
82        std::string tmp;
83        raw_string_ostream out(tmp);
84        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
85            << mStreamSetOutputs.size() << " output stream sets.";
86        report_fatal_error(out.str());
87    }
88    const auto blockSize = iBuilder->getBitBlockWidth();
89    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
90        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
91            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
92        }
93        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
94        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
95            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
96        }
97       
98    }
99    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
100        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
101        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
102            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
103        }
104    }
105    for (const auto binding : mScalarInputs) {
106        addScalar(binding.type, binding.name);
107    }
108    for (const auto binding : mScalarOutputs) {
109        addScalar(binding.type, binding.name);
110    }
111    if (mStreamSetNameMap.empty()) {
112        prepareKernelSignature();
113    }
114    for (auto binding : mInternalScalars) {
115        addScalar(binding.type, binding.name);
116    }
117    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
118    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
119    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
120}
121
122std::unique_ptr<Module> KernelBuilder::createKernelStub() {
123    return make_unique<Module>(getName() + "_" + iBuilder->getBuilderUniqueName(), iBuilder->getContext());
124}
125
126void KernelBuilder::setCallParameters(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
127    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
128    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
129        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
130            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
131                               + " cannot be null when calling generateKernel()");
132        }
133    }
134    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
135        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
136                           " input stream sets but generateKernel() was given "
137                           + std::to_string(mStreamSetInputBuffers.size()));
138    }
139   
140    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
141    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
142        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
143            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
144                               + " cannot be null when calling generateKernel()");
145        }
146    }
147    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
148        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
149                           + " output stream sets but generateKernel() was given "
150                           + std::to_string(mStreamSetOutputBuffers.size()));
151    }
152    prepareKernel(); // possibly overridden by the KernelBuilder subtype
153   
154}   
155
156std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
157    auto saveModule = iBuilder->getModule();
158    auto savePoint = iBuilder->saveIP();
159    auto module = createKernelStub();
160    iBuilder->setModule(module.get());
161    generateKernel(inputs, outputs);
162    iBuilder->setModule(saveModule);
163    iBuilder->restoreIP(savePoint);
164    return module;
165}
166
167void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
168    setCallParameters(inputs, outputs);
169    generateKernel();
170}
171
172void KernelBuilder::generateKernel() {
173    auto savePoint = iBuilder->saveIP();
174    addKernelDeclarations(iBuilder->getModule());
175    callGenerateInitMethod();
176    callGenerateDoSegmentMethod();
177    // Implement the accumulator get functions
178    for (auto binding : mScalarOutputs) {
179        Function * f = getAccumulatorFunction(binding.name);
180        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
181        Value * self = &*(f->arg_begin());
182        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
183        Value * retVal = iBuilder->CreateLoad(ptr);
184        iBuilder->CreateRet(retVal);
185    }
186    iBuilder->restoreIP(savePoint);
187}
188
189void KernelBuilder::callGenerateDoSegmentMethod() {
190    mCurrentMethod = getDoSegmentFunction();
191    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
192    auto args = mCurrentMethod->arg_begin();
193    mSelf = &*(args++);
194    Value * doFinal = &*(args++);
195    std::vector<Value *> producerPos;
196    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
197        producerPos.push_back(&*(args++));
198    }
199    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
200    iBuilder->CreateRetVoid();
201}
202
203void KernelBuilder::callGenerateInitMethod() {
204    mCurrentMethod = getInitFunction();
205    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
206    Function::arg_iterator args = mCurrentMethod->arg_begin();
207    mSelf = &*(args++);
208    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
209    for (auto binding : mScalarInputs) {
210        Value * param = &*(args++);
211        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
212        iBuilder->CreateStore(param, ptr);
213    }
214    generateInitMethod();
215    iBuilder->CreateRetVoid();
216}
217
218ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
219    const auto f = mKernelMap.find(name);
220    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
221        report_fatal_error(getName() + " does not contain scalar: " + name);
222    }
223    return iBuilder->getInt32(f->second);
224}
225
226unsigned KernelBuilder::getScalarCount() const {
227    return mKernelFields.size();
228}
229
230Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
231    assert ("instance cannot be null!" && instance);
232    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
233}
234
235Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
236    assert ("instance cannot be null!" && instance);
237    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
238}
239
240Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
241    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
242}
243
244Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
245    assert ("instance cannot be null!" && instance);
246    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
247}
248
249void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
250    assert ("instance cannot be null!" && instance);
251    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
252}
253
254void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
255    assert ("instance cannot be null!" && instance);
256    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
257}
258
259Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
260    assert ("instance cannot be null!" && instance);
261    unsigned ssIdx = getStreamSetIndex(name);
262    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
263        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
264        std::string principalField;
265        if (refSet.empty()) {
266            if (mStreamSetInputs.empty()) {
267                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
268            } else {
269                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
270            }
271        } else {
272            unsigned pfIndex = getStreamSetIndex(refSet);
273            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
274               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
275            } else {
276               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
277            }
278        }
279        Value * principalItemsProcessed = getScalarField(instance, principalField);
280        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
281    }
282    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
283}
284
285llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
286    assert ("instance cannot be null!" && instance);
287    return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
288}
289
290Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
291    assert ("instance cannot be null!" && instance);
292    unsigned ssIdx = getStreamSetIndex(name);
293    if (mStreamSetInputs[ssIdx].rate.isExact()) {
294        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
295        if (refSet.empty()) {
296            refSet = mStreamSetInputs[0].name;
297        }
298        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
299        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
300    }
301    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
302}
303
304void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
305    assert ("instance cannot be null!" && instance);
306    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
307}
308
309void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
310    assert ("instance cannot be null!" && instance);
311    setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
312}
313
314void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
315    assert ("instance cannot be null!" && instance);
316    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
317}
318
319void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
320    assert ("instance cannot be null!" && instance);
321    Value * itemCount = getProducedItemCount(instance, name);
322    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
323    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
324}
325
326Value * KernelBuilder::getTerminationSignal(Value * instance) const {
327    assert ("instance cannot be null!" && instance);
328    return getScalarField(instance, TERMINATION_SIGNAL);
329}
330
331void KernelBuilder::setTerminationSignal(Value * instance) const {
332    assert ("instance cannot be null!" && instance);
333    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
334}
335
336LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
337    assert ("instance cannot be null!" && instance);
338    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
339}
340
341void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
342    assert ("instance cannot be null!" && instance);
343    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
344}
345
346inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
347    for (const Binding & b : bindings) {
348        if (b.name == name) {
349            const auto divisor = iBuilder->getBitBlockWidth();
350            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
351                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
352            } else {
353                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
354            }
355        }
356    }
357    report_fatal_error("Error: no binding in " + getName() + " for " + name);
358}
359
360Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
361    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
362    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
363    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
364}
365
366Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
367    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
368}
369
370Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
371    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
372    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
373    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
374}
375
376Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
377    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
378}
379
380llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
381    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
382}
383
384llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
385    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
386    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
387    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
388    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
389}
390
391Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
392    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
393    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
394    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
395}
396
397void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
398    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
399}
400
401Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
402    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
403    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
404    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
405}
406
407void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
408    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
409}
410
411llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
412    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
413}
414
415Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
416    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
417}
418
419Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
420    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
421}
422
423unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
424    const auto f = mStreamSetNameMap.find(name);
425    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
426        report_fatal_error(getName() + " does not contain stream set: " + name);
427    }
428    return f->second;
429}
430
431Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
432    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
433}
434
435Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
436    for (auto & arg : f->getArgumentList()) {
437        if (arg.getName().equals(name)) {
438            return &arg;
439        }
440    }
441    report_fatal_error(getName() + " does not have parameter " + name);
442}
443
444Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
445    return iBuilder->CreateCall(getDoSegmentFunction(), args);
446}
447
448Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
449    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
450}
451
452BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
453    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
454}
455
456void KernelBuilder::createInstance() {
457    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
458        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
459    }
460    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
461
462    std::vector<Value *> args;
463    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
464    args.push_back(mKernelInstance);
465    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
466        Value * arg = mInitialArguments[i];
467        if (LLVM_UNLIKELY(arg == nullptr)) {
468            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
469                               + " cannot be null when calling createInstance()");
470        }
471        args.push_back(arg);
472    }
473    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
474        assert (mStreamSetInputBuffers[i]);
475        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
476        if (LLVM_UNLIKELY(arg == nullptr)) {
477            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
478                               + " was not allocated prior to calling createInstance()");
479        }
480        args.push_back(arg);
481    }
482    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
483    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
484        assert (mStreamSetOutputBuffers[i]);
485        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
486        if (LLVM_UNLIKELY(arg == nullptr)) {
487            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
488                               + " was not allocated prior to calling createInstance()");
489        }
490        args.push_back(arg);
491    }
492    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
493    iBuilder->CreateCall(getInitFunction(), args);
494}
495
496//  The default doSegment method dispatches to the doBlock routine for
497//  each block of the given number of blocksToDo, and then updates counts.
498
499void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
500
501    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
502    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
503    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
504    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
505    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
506    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
507
508    Value * baseTarget = nullptr;
509    if (useIndirectBr()) {
510        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
511    }
512
513    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
514    Value * availablePos = producerPos[0];
515    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
516        Value * p = producerPos[i];
517        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
518    }
519
520    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
521    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
522    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
523
524    iBuilder->CreateBr(strideLoopCond);
525
526    iBuilder->SetInsertPoint(strideLoopCond);
527
528    PHINode * branchTarget = nullptr;
529    if (useIndirectBr()) {
530        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
531        branchTarget->addIncoming(baseTarget, entryBlock);
532    }
533
534    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
535    stridesRemaining->addIncoming(stridesToDo, entryBlock);
536    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
537    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
538    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
539    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
540
541    iBuilder->SetInsertPoint(mStrideLoopBody);
542
543    if (useIndirectBr()) {
544        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
545        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
546    }
547
548    /// GENERATE DO BLOCK METHOD
549
550    writeDoBlockMethod();
551
552    /// UPDATE PROCESSED COUNTS
553
554    processed = getProcessedItemCount(mStreamSetInputs[0].name);
555    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
556    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
557
558    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
559
560    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
561    if (useIndirectBr()) {
562        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
563    }
564    iBuilder->CreateBr(strideLoopCond);
565
566    stridesDone->moveAfter(bodyEnd);
567
568    iBuilder->SetInsertPoint(stridesDone);
569
570    // Now conditionally perform the final block processing depending on the doFinal parameter.
571    if (useIndirectBr()) {
572        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
573        mStrideLoopBranch->addDestination(doFinalBlock);
574        mStrideLoopBranch->addDestination(segmentDone);
575    } else {
576        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
577    }
578
579    doFinalBlock->moveAfter(stridesDone);
580
581    iBuilder->SetInsertPoint(doFinalBlock);
582
583    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
584    writeFinalBlockMethod(remainingItems);
585
586    itemsDone = producerPos[0];
587    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
588    setTerminationSignal();
589    iBuilder->CreateBr(segmentDone);
590
591    segmentDone->moveAfter(iBuilder->GetInsertBlock());
592
593    iBuilder->SetInsertPoint(segmentDone);
594
595    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
596    if (useIndirectBr()) {
597        MDBuilder mdb(iBuilder->getContext());
598        const auto destinations = mStrideLoopBranch->getNumDestinations();
599        uint32_t weights[destinations];
600        for (unsigned i = 0; i < destinations; ++i) {
601            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
602        }
603        ArrayRef<uint32_t> bw(weights, destinations);
604        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
605    }
606
607}
608
609inline void BlockOrientedKernel::writeDoBlockMethod() {
610
611    Value * const self = mSelf;
612    Function * const cp = mCurrentMethod;
613    auto ip = iBuilder->saveIP();
614
615    /// Check if the do block method is called and create the function if necessary   
616    if (!useIndirectBr()) {
617        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
618        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
619        mCurrentMethod->setCallingConv(CallingConv::C);
620        mCurrentMethod->setDoesNotThrow();
621        mCurrentMethod->setDoesNotCapture(1);
622        auto args = mCurrentMethod->arg_begin();
623        mCurrentMethod = mCurrentMethod;
624        mSelf = &*args;
625        mSelf->setName("self");
626        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
627    }
628
629    std::vector<Value *> priorProduced;
630    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
631        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
632            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
633        }
634    }
635
636    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
637
638    unsigned priorIdx = 0;
639    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
640        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
641        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
642            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
643            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
644            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
645            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
646            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
647            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
648            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
649            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
650            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
651            iBuilder->CreateCondBr(wraparound, copyBack, done);
652            iBuilder->SetInsertPoint(copyBack);
653            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
654            cb->createCopyBack(instance, copyItems);
655            iBuilder->CreateBr(done);
656            iBuilder->SetInsertPoint(done);
657            priorIdx++;
658        }
659        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
660            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
661            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
662            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
663            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
664            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
665            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
666            iBuilder->CreateCondBr(wraparound, copyBack, done);
667            iBuilder->SetInsertPoint(copyBack);
668            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
669            cb->createCopyBack(instance, copyItems);
670            iBuilder->CreateBr(done);
671            iBuilder->SetInsertPoint(done);
672            priorIdx++;
673        }
674    }
675
676    /// Call the do block method if necessary then restore the current function state to the do segement method
677    if (!useIndirectBr()) {
678        iBuilder->CreateRetVoid();
679        mDoBlockMethod = mCurrentMethod;
680        iBuilder->restoreIP(ip);
681        iBuilder->CreateCall(mCurrentMethod, self);
682        mSelf = self;
683        mCurrentMethod = cp;
684    }
685
686}
687
688inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
689
690    Value * const self = mSelf;
691    Function * const cp = mCurrentMethod;
692    Value * const remainingItemCount = remainingItems;
693    auto ip = iBuilder->saveIP();
694
695    if (!useIndirectBr()) {
696        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
697        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
698        mCurrentMethod->setCallingConv(CallingConv::C);
699        mCurrentMethod->setDoesNotThrow();
700        mCurrentMethod->setDoesNotCapture(1);
701        auto args = mCurrentMethod->arg_begin();
702        mSelf = &*args;
703        mSelf->setName("self");
704        remainingItems = &*(++args);
705        remainingItems->setName("remainingItems");
706        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
707    }
708
709    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
710
711    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
712
713    if (!useIndirectBr()) {
714        iBuilder->CreateRetVoid();       
715        iBuilder->restoreIP(ip);
716        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
717        mCurrentMethod = cp;
718        mSelf = self;
719    }
720
721}
722
723//  The default finalBlock method simply dispatches to the doBlock routine.
724void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
725    CreateDoBlockMethodCall();
726}
727
728void BlockOrientedKernel::CreateDoBlockMethodCall() {
729    if (useIndirectBr()) {
730        BasicBlock * bb = CreateBasicBlock("resume");
731        mStrideLoopBranch->addDestination(bb);
732        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
733        iBuilder->CreateBr(mStrideLoopBody);
734        bb->moveAfter(iBuilder->GetInsertBlock());
735        iBuilder->SetInsertPoint(bb);
736    } else {
737        iBuilder->CreateCall(mDoBlockMethod, mSelf);
738    }
739}
740
741// CONSTRUCTOR
742
743BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
744                                                           std::string && kernelName,
745                                                           std::vector<Binding> && stream_inputs,
746                                                           std::vector<Binding> && stream_outputs,
747                                                           std::vector<Binding> && scalar_parameters,
748                                                           std::vector<Binding> && scalar_outputs,
749                                                           std::vector<Binding> && internal_scalars)
750: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
751, mDoBlockMethod(nullptr)
752, mStrideLoopBody(nullptr)
753, mStrideLoopBranch(nullptr)
754, mStrideLoopTarget(nullptr) {
755
756}
757
758
759// CONSTRUCTOR
760KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
761                             std::string && kernelName,
762                             std::vector<Binding> && stream_inputs,
763                             std::vector<Binding> && stream_outputs,
764                             std::vector<Binding> && scalar_parameters,
765                             std::vector<Binding> && scalar_outputs,
766                             std::vector<Binding> && internal_scalars)
767: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
768, mSelf(nullptr)
769, mCurrentMethod(nullptr)
770, mNoTerminateAttribute(false) {
771
772}
773
774KernelBuilder::~KernelBuilder() { }
775
776// CONSTRUCTOR
777SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
778                                             std::string && kernelName,
779                                             std::vector<Binding> && stream_inputs,
780                                             std::vector<Binding> && stream_outputs,
781                                             std::vector<Binding> && scalar_parameters,
782                                             std::vector<Binding> && scalar_outputs,
783                                             std::vector<Binding> && internal_scalars)
784: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
785
786}
Note: See TracBrowser for help on using the repository browser.