source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5392

Last change on this file since 5392 was 5392, checked in by cameron, 2 years ago

Kernel signatures for object cache

File size: 35.7 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Bitcode/ReaderWriter.h>
16#include <llvm/Transforms/Utils/Local.h>
17
18static const auto DO_BLOCK_SUFFIX = "_DoBlock";
19
20static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
21
22static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
23
24static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
25
26static const auto CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
27
28static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
29
30static const auto TERMINATION_SIGNAL = "terminationSignal";
31
32static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
33
34static const auto BLOCK_MASK_SUFFIX = "_blkMask";
35
36using namespace llvm;
37using namespace kernel;
38using namespace parabix;
39
40unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
41    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
42        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
43    }
44    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
45        report_fatal_error(getName() + " already contains scalar field " + name);
46    }
47    const auto index = mKernelFields.size();
48    mKernelMap.emplace(name, index);
49    mKernelFields.push_back(type);
50    return index;
51}
52
53unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
54    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
55        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
56    }
57    const auto index = mKernelFields.size();
58    mKernelFields.push_back(type);
59    return index;
60}
61
62void KernelBuilder::prepareStreamSetNameMap() {
63    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
64        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
65    }
66    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
67        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
68    }
69}
70   
71void KernelBuilder::prepareKernel() {
72    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
73        report_fatal_error("Cannot prepare kernel after kernel state finalized");
74    }
75    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
76        std::string tmp;
77        raw_string_ostream out(tmp);
78        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
79            << mStreamSetInputs.size() << " input stream sets.";
80        report_fatal_error(out.str());
81    }
82    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
83        std::string tmp;
84        raw_string_ostream out(tmp);
85        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
86            << mStreamSetOutputs.size() << " output stream sets.";
87        report_fatal_error(out.str());
88    }
89    const auto blockSize = iBuilder->getBitBlockWidth();
90    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
91        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
92            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
93        }
94        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
95        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
96            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
97        }
98       
99    }
100    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
101        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
102        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
103            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
104        }
105    }
106    for (const auto binding : mScalarInputs) {
107        addScalar(binding.type, binding.name);
108    }
109    for (const auto binding : mScalarOutputs) {
110        addScalar(binding.type, binding.name);
111    }
112    if (mStreamSetNameMap.empty()) {
113        prepareStreamSetNameMap();
114    }
115    for (auto binding : mInternalScalars) {
116        addScalar(binding.type, binding.name);
117    }
118    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
119    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
120    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
121}
122
123std::unique_ptr<Module> KernelBuilder::createKernelStub() {
124    return make_unique<Module>(getName() + "_" + iBuilder->getBuilderUniqueName(), iBuilder->getContext());
125}
126
127void KernelBuilder::setCallParameters(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
128    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
129    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
130        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
131            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
132                               + " cannot be null when calling generateKernel()");
133        }
134    }
135    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
136        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
137                           " input stream sets but generateKernel() was given "
138                           + std::to_string(mStreamSetInputBuffers.size()));
139    }
140   
141    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
142    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
143        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
144            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
145                               + " cannot be null when calling generateKernel()");
146        }
147    }
148    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
149        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
150                           + " output stream sets but generateKernel() was given "
151                           + std::to_string(mStreamSetOutputBuffers.size()));
152    }
153    prepareKernel(); // possibly overridden by the KernelBuilder subtype
154   
155}   
156
157
158// Default kernel signature: generate the IR and emit as byte code.
159void KernelBuilder::generateKernelSignature(std::string &signature) {
160    generateKernel();
161    raw_string_ostream OS(signature);
162    WriteBitcodeToFile(iBuilder->getModule(), OS);
163}
164
165
166std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
167    auto saveModule = iBuilder->getModule();
168    auto savePoint = iBuilder->saveIP();
169    auto module = createKernelStub();
170    iBuilder->setModule(module.get());
171    generateKernel(inputs, outputs);
172    iBuilder->setModule(saveModule);
173    iBuilder->restoreIP(savePoint);
174    return module;
175}
176
177void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
178    setCallParameters(inputs, outputs);
179    generateKernel();
180}
181
182void KernelBuilder::generateKernel() {
183    auto savePoint = iBuilder->saveIP();
184    addKernelDeclarations(iBuilder->getModule());
185    callGenerateInitMethod();
186    callGenerateDoSegmentMethod();
187    // Implement the accumulator get functions
188    for (auto binding : mScalarOutputs) {
189        Function * f = getAccumulatorFunction(binding.name);
190        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
191        Value * self = &*(f->arg_begin());
192        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
193        Value * retVal = iBuilder->CreateLoad(ptr);
194        iBuilder->CreateRet(retVal);
195    }
196    iBuilder->restoreIP(savePoint);
197}
198
199void KernelBuilder::callGenerateDoSegmentMethod() {
200    mCurrentMethod = getDoSegmentFunction();
201    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
202    auto args = mCurrentMethod->arg_begin();
203    mSelf = &*(args++);
204    Value * doFinal = &*(args++);
205    std::vector<Value *> producerPos;
206    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
207        producerPos.push_back(&*(args++));
208    }
209    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
210    iBuilder->CreateRetVoid();
211}
212
213void KernelBuilder::callGenerateInitMethod() {
214    mCurrentMethod = getInitFunction();
215    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
216    Function::arg_iterator args = mCurrentMethod->arg_begin();
217    mSelf = &*(args++);
218    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
219    for (auto binding : mScalarInputs) {
220        Value * param = &*(args++);
221        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
222        iBuilder->CreateStore(param, ptr);
223    }
224    generateInitMethod();
225    iBuilder->CreateRetVoid();
226}
227
228ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
229    const auto f = mKernelMap.find(name);
230    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
231        report_fatal_error(getName() + " does not contain scalar: " + name);
232    }
233    return iBuilder->getInt32(f->second);
234}
235
236unsigned KernelBuilder::getScalarCount() const {
237    return mKernelFields.size();
238}
239
240Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
241    assert ("instance cannot be null!" && instance);
242    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
243}
244
245Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
246    assert ("instance cannot be null!" && instance);
247    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
248}
249
250Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
251    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
252}
253
254Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
255    assert ("instance cannot be null!" && instance);
256    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
257}
258
259void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
260    assert ("instance cannot be null!" && instance);
261    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
262}
263
264void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
265    assert ("instance cannot be null!" && instance);
266    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
267}
268
269Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
270    assert ("instance cannot be null!" && instance);
271    unsigned ssIdx = getStreamSetIndex(name);
272    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
273        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
274        std::string principalField;
275        if (refSet.empty()) {
276            if (mStreamSetInputs.empty()) {
277                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
278            } else {
279                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
280            }
281        } else {
282            unsigned pfIndex = getStreamSetIndex(refSet);
283            if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
284               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
285            } else {
286               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
287            }
288        }
289        Value * principalItemsProcessed = getScalarField(instance, principalField);
290        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
291    }
292    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
293}
294
295llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
296    assert ("instance cannot be null!" && instance);
297    return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
298}
299
300Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
301    assert ("instance cannot be null!" && instance);
302    unsigned ssIdx = getStreamSetIndex(name);
303    if (mStreamSetInputs[ssIdx].rate.isExact()) {
304        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
305        if (refSet.empty()) {
306            refSet = mStreamSetInputs[0].name;
307        }
308        Value * principalItemsProcessed = getScalarField(instance, refSet + PROCESSED_ITEM_COUNT_SUFFIX);
309        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
310    }
311    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
312}
313
314void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
315    assert ("instance cannot be null!" && instance);
316    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
317}
318
319void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
320    assert ("instance cannot be null!" && instance);
321    setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
322}
323
324void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
325    assert ("instance cannot be null!" && instance);
326    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
327}
328
329void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
330    assert ("instance cannot be null!" && instance);
331    Value * itemCount = getProducedItemCount(instance, name);
332    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
333    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
334}
335
336Value * KernelBuilder::getTerminationSignal(Value * instance) const {
337    assert ("instance cannot be null!" && instance);
338    return getScalarField(instance, TERMINATION_SIGNAL);
339}
340
341void KernelBuilder::setTerminationSignal(Value * instance) const {
342    assert ("instance cannot be null!" && instance);
343    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
344}
345
346LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
347    assert ("instance cannot be null!" && instance);
348    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
349}
350
351void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
352    assert ("instance cannot be null!" && instance);
353    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
354}
355
356inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
357    for (const Binding & b : bindings) {
358        if (b.name == name) {
359            const auto divisor = iBuilder->getBitBlockWidth();
360            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
361                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
362            } else {
363                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
364            }
365        }
366    }
367    report_fatal_error("Error: no binding in " + getName() + " for " + name);
368}
369
370Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
371    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
372    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
373    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
374}
375
376Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
377    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
378}
379
380Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
381    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
382    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
383    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
384}
385
386Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
387    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
388}
389
390llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
391    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
392}
393
394llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
395    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
396    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
397    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
398    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
399}
400
401Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
402    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
403    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
404    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
405}
406
407void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
408    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
409}
410
411Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
412    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
413    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
414    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
415}
416
417void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
418    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
419}
420
421llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
422    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
423}
424
425Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
426    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
427}
428
429Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
430    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
431}
432
433unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
434    const auto f = mStreamSetNameMap.find(name);
435    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
436        report_fatal_error(getName() + " does not contain stream set: " + name);
437    }
438    return f->second;
439}
440
441Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
442    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
443}
444
445Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
446    for (auto & arg : f->getArgumentList()) {
447        if (arg.getName().equals(name)) {
448            return &arg;
449        }
450    }
451    report_fatal_error(getName() + " does not have parameter " + name);
452}
453
454Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
455    return iBuilder->CreateCall(getDoSegmentFunction(), args);
456}
457
458Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
459    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
460}
461
462BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
463    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
464}
465
466void KernelBuilder::createInstance() {
467    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
468        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
469    }
470    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
471
472    std::vector<Value *> args;
473    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
474    args.push_back(mKernelInstance);
475    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
476        Value * arg = mInitialArguments[i];
477        if (LLVM_UNLIKELY(arg == nullptr)) {
478            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
479                               + " cannot be null when calling createInstance()");
480        }
481        args.push_back(arg);
482    }
483    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
484        assert (mStreamSetInputBuffers[i]);
485        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
486        if (LLVM_UNLIKELY(arg == nullptr)) {
487            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
488                               + " was not allocated prior to calling createInstance()");
489        }
490        args.push_back(arg);
491    }
492    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
493    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
494        assert (mStreamSetOutputBuffers[i]);
495        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
496        if (LLVM_UNLIKELY(arg == nullptr)) {
497            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
498                               + " was not allocated prior to calling createInstance()");
499        }
500        args.push_back(arg);
501    }
502    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
503    iBuilder->CreateCall(getInitFunction(), args);
504}
505
506//  The default doSegment method dispatches to the doBlock routine for
507//  each block of the given number of blocksToDo, and then updates counts.
508
509void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
510
511    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
512    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
513    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
514    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
515    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
516    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
517
518    Value * baseTarget = nullptr;
519    if (useIndirectBr()) {
520        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
521    }
522
523    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
524    Value * availablePos = producerPos[0];
525    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
526        Value * p = producerPos[i];
527        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
528    }
529
530    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
531    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
532    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
533
534    iBuilder->CreateBr(strideLoopCond);
535
536    iBuilder->SetInsertPoint(strideLoopCond);
537
538    PHINode * branchTarget = nullptr;
539    if (useIndirectBr()) {
540        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
541        branchTarget->addIncoming(baseTarget, entryBlock);
542    }
543
544    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
545    stridesRemaining->addIncoming(stridesToDo, entryBlock);
546    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
547    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
548    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
549    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
550
551    iBuilder->SetInsertPoint(mStrideLoopBody);
552
553    if (useIndirectBr()) {
554        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
555        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
556    }
557
558    /// GENERATE DO BLOCK METHOD
559
560    writeDoBlockMethod();
561
562    /// UPDATE PROCESSED COUNTS
563
564    processed = getProcessedItemCount(mStreamSetInputs[0].name);
565    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
566    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
567
568    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
569
570    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
571    if (useIndirectBr()) {
572        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
573    }
574    iBuilder->CreateBr(strideLoopCond);
575
576    stridesDone->moveAfter(bodyEnd);
577
578    iBuilder->SetInsertPoint(stridesDone);
579
580    // Now conditionally perform the final block processing depending on the doFinal parameter.
581    if (useIndirectBr()) {
582        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
583        mStrideLoopBranch->addDestination(doFinalBlock);
584        mStrideLoopBranch->addDestination(segmentDone);
585    } else {
586        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
587    }
588
589    doFinalBlock->moveAfter(stridesDone);
590
591    iBuilder->SetInsertPoint(doFinalBlock);
592
593    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
594    writeFinalBlockMethod(remainingItems);
595
596    itemsDone = producerPos[0];
597    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
598    setTerminationSignal();
599    iBuilder->CreateBr(segmentDone);
600
601    segmentDone->moveAfter(iBuilder->GetInsertBlock());
602
603    iBuilder->SetInsertPoint(segmentDone);
604
605    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
606    if (useIndirectBr()) {
607        MDBuilder mdb(iBuilder->getContext());
608        const auto destinations = mStrideLoopBranch->getNumDestinations();
609        uint32_t weights[destinations];
610        for (unsigned i = 0; i < destinations; ++i) {
611            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
612        }
613        ArrayRef<uint32_t> bw(weights, destinations);
614        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
615    }
616
617}
618
619inline void BlockOrientedKernel::writeDoBlockMethod() {
620
621    Value * const self = mSelf;
622    Function * const cp = mCurrentMethod;
623    auto ip = iBuilder->saveIP();
624
625    /// Check if the do block method is called and create the function if necessary   
626    if (!useIndirectBr()) {
627        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
628        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
629        mCurrentMethod->setCallingConv(CallingConv::C);
630        mCurrentMethod->setDoesNotThrow();
631        mCurrentMethod->setDoesNotCapture(1);
632        auto args = mCurrentMethod->arg_begin();
633        mCurrentMethod = mCurrentMethod;
634        mSelf = &*args;
635        mSelf->setName("self");
636        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
637    }
638
639    std::vector<Value *> priorProduced;
640    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
641        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
642            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
643        }
644    }
645
646    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
647
648    unsigned priorIdx = 0;
649    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
650        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
651        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
652            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
653            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
654            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
655            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
656            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
657            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
658            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
659            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
660            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
661            iBuilder->CreateCondBr(wraparound, copyBack, done);
662            iBuilder->SetInsertPoint(copyBack);
663            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
664            cb->createCopyBack(instance, copyItems);
665            iBuilder->CreateBr(done);
666            iBuilder->SetInsertPoint(done);
667            priorIdx++;
668        }
669        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
670            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
671            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
672            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
673            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
674            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
675            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
676            iBuilder->CreateCondBr(wraparound, copyBack, done);
677            iBuilder->SetInsertPoint(copyBack);
678            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
679            cb->createCopyBack(instance, copyItems);
680            iBuilder->CreateBr(done);
681            iBuilder->SetInsertPoint(done);
682            priorIdx++;
683        }
684    }
685
686    /// Call the do block method if necessary then restore the current function state to the do segement method
687    if (!useIndirectBr()) {
688        iBuilder->CreateRetVoid();
689        mDoBlockMethod = mCurrentMethod;
690        iBuilder->restoreIP(ip);
691        iBuilder->CreateCall(mCurrentMethod, self);
692        mSelf = self;
693        mCurrentMethod = cp;
694    }
695
696}
697
698inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
699
700    Value * const self = mSelf;
701    Function * const cp = mCurrentMethod;
702    Value * const remainingItemCount = remainingItems;
703    auto ip = iBuilder->saveIP();
704
705    if (!useIndirectBr()) {
706        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
707        mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
708        mCurrentMethod->setCallingConv(CallingConv::C);
709        mCurrentMethod->setDoesNotThrow();
710        mCurrentMethod->setDoesNotCapture(1);
711        auto args = mCurrentMethod->arg_begin();
712        mSelf = &*args;
713        mSelf->setName("self");
714        remainingItems = &*(++args);
715        remainingItems->setName("remainingItems");
716        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
717    }
718
719    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
720
721    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
722
723    if (!useIndirectBr()) {
724        iBuilder->CreateRetVoid();       
725        iBuilder->restoreIP(ip);
726        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
727        mCurrentMethod = cp;
728        mSelf = self;
729    }
730
731}
732
733//  The default finalBlock method simply dispatches to the doBlock routine.
734void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
735    CreateDoBlockMethodCall();
736}
737
738void BlockOrientedKernel::CreateDoBlockMethodCall() {
739    if (useIndirectBr()) {
740        BasicBlock * bb = CreateBasicBlock("resume");
741        mStrideLoopBranch->addDestination(bb);
742        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
743        iBuilder->CreateBr(mStrideLoopBody);
744        bb->moveAfter(iBuilder->GetInsertBlock());
745        iBuilder->SetInsertPoint(bb);
746    } else {
747        iBuilder->CreateCall(mDoBlockMethod, mSelf);
748    }
749}
750
751// CONSTRUCTOR
752
753BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
754                                                           std::string && kernelName,
755                                                           std::vector<Binding> && stream_inputs,
756                                                           std::vector<Binding> && stream_outputs,
757                                                           std::vector<Binding> && scalar_parameters,
758                                                           std::vector<Binding> && scalar_outputs,
759                                                           std::vector<Binding> && internal_scalars)
760: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
761, mDoBlockMethod(nullptr)
762, mStrideLoopBody(nullptr)
763, mStrideLoopBranch(nullptr)
764, mStrideLoopTarget(nullptr) {
765
766}
767
768
769// CONSTRUCTOR
770KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
771                             std::string && kernelName,
772                             std::vector<Binding> && stream_inputs,
773                             std::vector<Binding> && stream_outputs,
774                             std::vector<Binding> && scalar_parameters,
775                             std::vector<Binding> && scalar_outputs,
776                             std::vector<Binding> && internal_scalars)
777: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
778, mSelf(nullptr)
779, mCurrentMethod(nullptr)
780, mNoTerminateAttribute(false) {
781
782}
783
784KernelBuilder::~KernelBuilder() { }
785
786// CONSTRUCTOR
787SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
788                                             std::string && kernelName,
789                                             std::vector<Binding> && stream_inputs,
790                                             std::vector<Binding> && stream_outputs,
791                                             std::vector<Binding> && scalar_parameters,
792                                             std::vector<Binding> && scalar_outputs,
793                                             std::vector<Binding> && internal_scalars)
794: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
795
796}
Note: See TracBrowser for help on using the repository browser.