source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5411

Last change on this file since 5411 was 5411, checked in by nmedfort, 2 years ago

Potential bug fix for 32-bit. Modified MRemap to check for Linux OS support. Added MMapAdvise to CBuilder.

File size: 38.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <kernels/toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Bitcode/ReaderWriter.h>
16#include <llvm/Transforms/Utils/Local.h>
17#include <kernels/streamset.h>
18#include <sstream>
19
20static const std::string DO_BLOCK_SUFFIX = "_DoBlock";
21
22static const std::string FINAL_BLOCK_SUFFIX = "_FinalBlock";
23
24static const std::string LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
25
26static const std::string PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
27
28static const std::string CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
29
30static const std::string PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
31
32static const std::string TERMINATION_SIGNAL = "terminationSignal";
33
34static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
35
36static const std::string CONSUMER_LOGICAL_SEGMENT_SUFFIX = "_cls";
37
38using namespace llvm;
39using namespace kernel;
40using namespace parabix;
41
42unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
43    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
44        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
45    }
46    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
47        report_fatal_error(getName() + " already contains scalar field " + name);
48    }
49    const auto index = mKernelFields.size();
50    mKernelMap.emplace(name, index);
51    mKernelFields.push_back(type);
52    return index;
53}
54
55unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
56    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
57        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
58    }
59    const auto index = mKernelFields.size();
60    mKernelFields.push_back(type);
61    return index;
62}
63
64void KernelBuilder::prepareStreamSetNameMap() {
65    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
66        mStreamMap.emplace(mStreamSetInputs[i].name, std::make_pair(Port::Input, i));
67    }
68    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
69        mStreamMap.emplace(mStreamSetOutputs[i].name, std::make_pair(Port::Output, i));
70    }
71}
72   
73void KernelBuilder::prepareKernel() {
74
75    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
76        report_fatal_error("Cannot prepare kernel after kernel state finalized");
77    }
78
79    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
80        std::string tmp;
81        raw_string_ostream out(tmp);
82        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
83            << mStreamSetInputs.size() << " input stream sets.";
84        report_fatal_error(out.str());
85    }
86    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
87        std::string tmp;
88        raw_string_ostream out(tmp);
89        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
90            << mStreamSetOutputs.size() << " output stream sets.";
91        report_fatal_error(out.str());
92    }
93    const auto blockSize = iBuilder->getBitBlockWidth();
94    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
95        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
96            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
97        }
98        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
99        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
100            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
101        }       
102    }
103
104    IntegerType * const sizeTy = iBuilder->getSizeTy();
105    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
106        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
107        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
108            addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
109        }
110    }
111    for (const auto binding : mScalarInputs) {
112        addScalar(binding.type, binding.name);
113    }
114    for (const auto binding : mScalarOutputs) {
115        addScalar(binding.type, binding.name);
116    }
117    if (mStreamMap.empty()) {
118        prepareStreamSetNameMap();
119    }
120    for (auto binding : mInternalScalars) {
121        addScalar(binding.type, binding.name);
122    }
123
124    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
125    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
126        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
127    }
128
129    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
130    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
131
132    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
133}
134
135Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
136
137    assert (mStreamSetInputBuffers.empty());
138    assert (mStreamSetOutputBuffers.empty());
139
140    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
141        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
142                           " input stream sets but was given "
143                           + std::to_string(mStreamSetInputBuffers.size()));
144    }
145
146    for (unsigned i = 0; i < inputs.size(); ++i) {
147        StreamSetBuffer * const buf = inputs[i];
148        if (LLVM_UNLIKELY(buf == nullptr)) {
149            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
150                               + " cannot be null");
151        }
152        buf->addConsumer(this);
153    }
154
155    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
156        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
157                           + " output stream sets but was given "
158                           + std::to_string(mStreamSetOutputBuffers.size()));
159    }
160
161    for (unsigned i = 0; i < outputs.size(); ++i) {
162        StreamSetBuffer * const buf = outputs[i];
163        if (LLVM_UNLIKELY(buf == nullptr)) {
164            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
165                               + " cannot be null");
166        }
167        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
168            buf->setProducer(this);
169        } else {
170            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
171                               + " is already produced by kernel " + buf->getProducer()->getName());
172        }
173    }
174
175    std::stringstream cacheName;
176
177    cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
178    for (const StreamSetBuffer * b: inputs) {
179        cacheName <<  ':' <<  b->getUniqueID();
180    }
181    for (const StreamSetBuffer * b: outputs) {
182        cacheName <<  ':' <<  b->getUniqueID();
183    }
184
185    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
186    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
187
188    prepareKernel();
189
190    Module * const m = new Module(cacheName.str(), iBuilder->getContext());
191    m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
192    return m;
193}
194
195// Default kernel signature: generate the IR and emit as byte code.
196std::string KernelBuilder::generateKernelSignature(std::string moduleId) {
197    if (moduleIDisSignature()) {
198        return moduleId;
199    } else {
200        generateKernel();
201        std::string signature;
202        raw_string_ostream OS(signature);
203        WriteBitcodeToFile(iBuilder->getModule(), OS);
204        return signature;
205    }
206}
207
208void KernelBuilder::generateKernel() {
209    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
210    // generated the unoptimized IR.
211    if (!mIsGenerated) {
212        auto saveInstance = getInstance();
213        auto savePoint = iBuilder->saveIP();
214        addKernelDeclarations(iBuilder->getModule());
215        callGenerateInitMethod();
216        callGenerateDoSegmentMethod();       
217        // Implement the accumulator get functions
218        for (auto binding : mScalarOutputs) {
219            Function * f = getAccumulatorFunction(binding.name);
220            iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
221            Value * self = &*(f->arg_begin());
222            Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
223            Value * retVal = iBuilder->CreateLoad(ptr);
224            iBuilder->CreateRet(retVal);
225        }
226        callGenerateTerminateMethod();
227        iBuilder->restoreIP(savePoint);
228        setInstance(saveInstance);
229        mIsGenerated = true;       
230    }
231}
232
233void KernelBuilder::callGenerateDoSegmentMethod() {
234    mCurrentMethod = getDoSegmentFunction();
235    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
236    auto args = mCurrentMethod->arg_begin();
237    setInstance(&*(args++));
238    Value * doFinal = &*(args++);
239    std::vector<Value *> producerPos;
240    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
241        producerPos.push_back(&*(args++));
242    }
243    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
244    iBuilder->CreateRetVoid();
245}
246
247void KernelBuilder::callGenerateInitMethod() {
248    mCurrentMethod = getInitFunction();
249    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
250    Function::arg_iterator args = mCurrentMethod->arg_begin();
251    setInstance(&*(args++));
252    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
253    for (auto binding : mScalarInputs) {
254        setScalarField(binding.name, &*(args++));
255    }
256    for (auto binding : mStreamSetOutputs) {
257        setConsumerState(binding.name, &*(args++));
258    }
259    generateInitMethod();
260    iBuilder->CreateRetVoid();
261}
262
263void KernelBuilder::callGenerateTerminateMethod() {
264    mCurrentMethod = getTerminateFunction();
265    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
266    auto args = mCurrentMethod->arg_begin();
267    setInstance(&*(args++));
268    generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
269    iBuilder->CreateRetVoid();
270}
271
272ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
273    const auto f = mKernelMap.find(name);
274    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
275        report_fatal_error(getName() + " does not contain scalar: " + name);
276    }
277    return iBuilder->getInt32(f->second);
278}
279
280Value * KernelBuilder::getProducedItemCount(const std::string & name, Value * doFinal) const {
281    Port port; unsigned ssIdx;
282    std::tie(port, ssIdx) = getStreamPort(name);
283    assert (port == Port::Output);
284    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
285        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
286        std::string principalField;
287        if (refSet.empty()) {
288            if (mStreamSetInputs.empty()) {
289                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
290            } else {
291                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
292            }
293        } else {
294            Port port; unsigned pfIndex;
295            std::tie(port, pfIndex) = getStreamPort(refSet);
296            if (port == Port::Input) {
297               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
298            } else {
299               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
300            }
301        }
302        Value * principalItemsProcessed = getScalarField(principalField);
303        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
304    }
305    return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
306}
307
308llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
309    auto arg = mCurrentMethod->arg_begin();
310    ++arg; // self
311    ++arg; // doFinal
312    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
313        if (mStreamSetInputs[i].name == name) {
314            return &*arg;
315        }
316        ++arg;
317    }
318    return nullptr;
319}
320
321Value * KernelBuilder::getProcessedItemCount(const std::string & name) const {
322    Port port; unsigned ssIdx;
323    std::tie(port, ssIdx) = getStreamPort(name);
324    assert (port == Port::Input);
325    if (mStreamSetInputs[ssIdx].rate.isExact()) {
326        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
327        if (refSet.empty()) {
328            refSet = mStreamSetInputs[0].name;
329        }
330        Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
331        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
332    }
333    return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
334}
335
336void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
337    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
338}
339
340void KernelBuilder::setProcessedItemCount(const std::string & name, Value * value) const {
341    setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
342}
343
344Value * KernelBuilder::getTerminationSignal() const {
345    return getScalarField(TERMINATION_SIGNAL);
346}
347
348void KernelBuilder::setTerminationSignal() const {
349    setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
350}
351
352LoadInst * KernelBuilder::acquireLogicalSegmentNo() const {
353    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
354}
355
356void KernelBuilder::releaseLogicalSegmentNo(Value * nextSegNo) const {
357    iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
358}
359
360llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
361    return getScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
362}
363
364void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
365    setScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX, value);
366}
367
368inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
369    for (const Binding & b : bindings) {
370        if (b.name == name) {
371            const auto divisor = iBuilder->getBitBlockWidth();
372            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
373                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
374            } else {
375                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
376            }
377        }
378    }
379    report_fatal_error("Error: no binding in " + getName() + " for " + name);
380}
381
382Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
383    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
384    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
385    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
386}
387
388Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
389    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
390}
391
392Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
393    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
394    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
395    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
396}
397
398Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
399    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
400}
401
402llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
403    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
404}
405
406llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
407    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
408    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
409    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
410    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
411}
412
413Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
414    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
415    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
416    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
417}
418
419void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
420    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
421}
422
423Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
424    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
425    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
426    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
427}
428
429void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
430    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
431}
432
433llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
434    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
435}
436
437Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
438    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
439}
440
441Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
442    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
443}
444
445void KernelBuilder::setBaseAddress(const std::string & name, llvm::Value * addr) const {
446    unsigned index; Port port;
447    std::tie(port, index) = getStreamPort(name);
448    const StreamSetBuffer * buf = nullptr;
449    if (port == Port::Input) {
450        assert (index < mStreamSetInputBuffers.size());
451        buf = mStreamSetInputBuffers[index];
452    } else {
453        assert (index < mStreamSetOutputBuffers.size());
454        buf = mStreamSetOutputBuffers[index];
455    }
456    return buf->setBaseAddress(getStreamSetBufferPtr(name), addr);
457}
458
459Value * KernelBuilder::getBufferedSize(const std::string & name) const {
460    unsigned index; Port port;
461    std::tie(port, index) = getStreamPort(name);
462    const StreamSetBuffer * buf = nullptr;
463    if (port == Port::Input) {
464        assert (index < mStreamSetInputBuffers.size());
465        buf = mStreamSetInputBuffers[index];
466    } else {
467        assert (index < mStreamSetOutputBuffers.size());
468        buf = mStreamSetOutputBuffers[index];
469    }
470    return buf->getBufferedSize(getStreamSetBufferPtr(name));
471}
472
473void KernelBuilder::setBufferedSize(const std::string & name, Value * size) const {
474    unsigned index; Port port;
475    std::tie(port, index) = getStreamPort(name);
476    const StreamSetBuffer * buf = nullptr;
477    if (port == Port::Input) {
478        assert (index < mStreamSetInputBuffers.size());
479        buf = mStreamSetInputBuffers[index];
480    } else {
481        assert (index < mStreamSetOutputBuffers.size());
482        buf = mStreamSetOutputBuffers[index];
483    }
484    buf->setBufferedSize(getStreamSetBufferPtr(name), size);
485}
486
487void KernelBuilder::reserveBytes(const std::string & name, llvm::Value * value) const {
488    Value * itemCount = getProducedItemCount(name);
489    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
490    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
491}
492
493KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
494    const auto f = mStreamMap.find(name);
495    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
496        report_fatal_error(getName() + " does not contain stream set: " + name);
497    }
498    return f->second;
499}
500
501Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
502    return getScalarField(name + BUFFER_PTR_SUFFIX);
503}
504
505Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
506    for (auto & arg : f->getArgumentList()) {
507        if (arg.getName().equals(name)) {
508            return &arg;
509        }
510    }
511    report_fatal_error(getName() + " does not have parameter " + name);
512}
513
514CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
515    assert (getDoSegmentFunction()->getArgumentList().size() == args.size());
516    return iBuilder->CreateCall(getDoSegmentFunction(), args);
517}
518
519CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
520    return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
521}
522
523BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
524    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
525}
526
527Value * KernelBuilder::createInstance() {
528    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
529        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
530    }
531    setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
532    return getInstance();
533}
534
535void KernelBuilder::initializeInstance() {
536
537
538    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
539        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
540    }
541    std::vector<Value *> args;
542    args.reserve(1 + mInitialArguments.size() + mStreamSetInputBuffers.size() + (mStreamSetOutputBuffers.size() * 2));
543    args.push_back(getInstance());
544    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
545        Value * arg = mInitialArguments[i];
546        if (LLVM_UNLIKELY(arg == nullptr)) {
547            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
548                               + " cannot be null when calling createInstance()");
549        }
550        args.push_back(arg);
551    }
552    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
553        assert (mStreamSetInputBuffers[i]);
554        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
555        if (LLVM_UNLIKELY(arg == nullptr)) {
556            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
557                               + " was not allocated prior to calling createInstance()");
558        }
559        args.push_back(arg);
560    }
561    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
562    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
563        assert (mStreamSetOutputBuffers[i]);
564        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
565        if (LLVM_UNLIKELY(arg == nullptr)) {
566            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
567                               + " was not allocated prior to calling createInstance()");
568        }
569        args.push_back(arg);
570    }
571    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
572
573    IntegerType * const sizeTy = iBuilder->getSizeTy();
574    PointerType * const sizePtrTy = sizeTy->getPointerTo();
575    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
576    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
577    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
578        const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
579        const auto n = consumers.size();
580        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
581        Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
582        for (unsigned i = 0; i < n; ++i) {
583            KernelBuilder * const consumer = consumers[i];
584            assert (consumer->getInstance());
585            Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
586            iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
587        }
588        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
589        iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
590        Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
591        iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
592        args.push_back(outputConsumers);
593    }
594    iBuilder->CreateCall(getInitFunction(), args);
595}
596
597void KernelBuilder::terminateInstance() {
598    iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
599}
600
601//  The default doSegment method dispatches to the doBlock routine for
602//  each block of the given number of blocksToDo, and then updates counts.
603
604void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
605
606    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
607    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
608    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
609    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
610    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
611    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
612
613    Value * baseTarget = nullptr;
614    if (useIndirectBr()) {
615        baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
616    }
617
618    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
619    Value * availablePos = producerPos[0];
620    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
621    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
622    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
623
624    iBuilder->CreateBr(strideLoopCond);
625
626    iBuilder->SetInsertPoint(strideLoopCond);
627
628    PHINode * branchTarget = nullptr;
629    if (useIndirectBr()) {
630        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
631        branchTarget->addIncoming(baseTarget, entryBlock);
632    }
633
634    PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
635    stridesRemaining->addIncoming(stridesToDo, entryBlock);
636    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
637    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
638    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
639    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
640
641    iBuilder->SetInsertPoint(mStrideLoopBody);
642
643    if (useIndirectBr()) {
644        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
645        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
646    }
647
648    /// GENERATE DO BLOCK METHOD
649
650    writeDoBlockMethod();
651
652    /// UPDATE PROCESSED COUNTS
653
654    processed = getProcessedItemCount(mStreamSetInputs[0].name);
655    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
656    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
657
658    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
659
660    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
661    if (useIndirectBr()) {
662        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
663    }
664    iBuilder->CreateBr(strideLoopCond);
665
666    stridesDone->moveAfter(bodyEnd);
667
668    iBuilder->SetInsertPoint(stridesDone);
669
670    // Now conditionally perform the final block processing depending on the doFinal parameter.
671    if (useIndirectBr()) {
672        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
673        mStrideLoopBranch->addDestination(doFinalBlock);
674        mStrideLoopBranch->addDestination(segmentDone);
675    } else {
676        iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
677    }
678
679    doFinalBlock->moveAfter(stridesDone);
680
681    iBuilder->SetInsertPoint(doFinalBlock);
682
683    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
684    writeFinalBlockMethod(remainingItems);
685
686    itemsDone = producerPos[0];
687    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
688    setTerminationSignal();
689    iBuilder->CreateBr(segmentDone);
690
691    segmentDone->moveAfter(iBuilder->GetInsertBlock());
692
693    iBuilder->SetInsertPoint(segmentDone);
694
695    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
696    if (useIndirectBr()) {
697        MDBuilder mdb(iBuilder->getContext());
698        const auto destinations = mStrideLoopBranch->getNumDestinations();
699        uint32_t weights[destinations];
700        for (unsigned i = 0; i < destinations; ++i) {
701            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
702        }
703        ArrayRef<uint32_t> bw(weights, destinations);
704        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
705    }
706
707}
708
709inline void BlockOrientedKernel::writeDoBlockMethod() {
710
711    Value * const self = getInstance();
712    Function * const cp = mCurrentMethod;
713    auto ip = iBuilder->saveIP();
714
715    /// Check if the do block method is called and create the function if necessary   
716    if (!useIndirectBr()) {
717        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
718        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
719        mCurrentMethod->setCallingConv(CallingConv::C);
720        mCurrentMethod->setDoesNotThrow();
721        mCurrentMethod->setDoesNotCapture(1);
722        auto args = mCurrentMethod->arg_begin();
723        mCurrentMethod = mCurrentMethod;
724        args->setName("self");
725        setInstance(&*args);
726        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
727    }
728
729    std::vector<Value *> priorProduced;
730    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
731        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
732            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
733        }
734    }
735
736    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
737
738    unsigned priorIdx = 0;
739    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
740        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
741        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
742            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
743            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
744            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
745            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
746            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
747            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
748            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
749            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
750            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
751            iBuilder->CreateCondBr(wraparound, copyBack, done);
752            iBuilder->SetInsertPoint(copyBack);
753            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
754            cb->createCopyBack(instance, copyItems);
755            iBuilder->CreateBr(done);
756            iBuilder->SetInsertPoint(done);
757            priorIdx++;
758        }
759        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
760            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
761            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
762            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
763            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
764            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
765            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
766            iBuilder->CreateCondBr(wraparound, copyBack, done);
767            iBuilder->SetInsertPoint(copyBack);
768            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
769            cb->createCopyBack(instance, copyItems);
770            iBuilder->CreateBr(done);
771            iBuilder->SetInsertPoint(done);
772            priorIdx++;
773        }
774    }
775
776    /// Call the do block method if necessary then restore the current function state to the do segement method
777    if (!useIndirectBr()) {
778        iBuilder->CreateRetVoid();
779        mDoBlockMethod = mCurrentMethod;
780        iBuilder->restoreIP(ip);
781        iBuilder->CreateCall(mCurrentMethod, self);
782        setInstance(self);
783        mCurrentMethod = cp;
784    }
785
786}
787
788inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
789
790    Value * const self = getInstance();
791    Function * const cp = mCurrentMethod;
792    Value * const remainingItemCount = remainingItems;
793    auto ip = iBuilder->saveIP();
794
795    if (!useIndirectBr()) {
796        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
797        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
798        mCurrentMethod->setCallingConv(CallingConv::C);
799        mCurrentMethod->setDoesNotThrow();
800        mCurrentMethod->setDoesNotCapture(1);
801        auto args = mCurrentMethod->arg_begin();
802        args->setName("self");
803        setInstance(&*args);
804        remainingItems = &*(++args);
805        remainingItems->setName("remainingItems");
806        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
807    }
808
809    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
810
811    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
812
813    if (!useIndirectBr()) {
814        iBuilder->CreateRetVoid();       
815        iBuilder->restoreIP(ip);
816        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
817        mCurrentMethod = cp;
818        setInstance(self);
819    }
820
821}
822
823//  The default finalBlock method simply dispatches to the doBlock routine.
824void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
825    CreateDoBlockMethodCall();
826}
827
828void BlockOrientedKernel::CreateDoBlockMethodCall() {
829    if (useIndirectBr()) {
830        BasicBlock * bb = CreateBasicBlock("resume");
831        mStrideLoopBranch->addDestination(bb);
832        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
833        iBuilder->CreateBr(mStrideLoopBody);
834        bb->moveAfter(iBuilder->GetInsertBlock());
835        iBuilder->SetInsertPoint(bb);
836    } else {
837        iBuilder->CreateCall(mDoBlockMethod, getInstance());
838    }
839}
840
841
842// CONSTRUCTOR
843KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
844                             std::string && kernelName,
845                             std::vector<Binding> && stream_inputs,
846                             std::vector<Binding> && stream_outputs,
847                             std::vector<Binding> && scalar_parameters,
848                             std::vector<Binding> && scalar_outputs,
849                             std::vector<Binding> && internal_scalars)
850: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
851, mCurrentMethod(nullptr)
852, mNoTerminateAttribute(false)
853, mIsGenerated(false) {
854
855}
856
857KernelBuilder::~KernelBuilder() {
858
859}
860
861// CONSTRUCTOR
862BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
863                                         std::string && kernelName,
864                                         std::vector<Binding> && stream_inputs,
865                                         std::vector<Binding> && stream_outputs,
866                                         std::vector<Binding> && scalar_parameters,
867                                         std::vector<Binding> && scalar_outputs,
868                                         std::vector<Binding> && internal_scalars)
869: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
870, mDoBlockMethod(nullptr)
871, mStrideLoopBody(nullptr)
872, mStrideLoopBranch(nullptr)
873, mStrideLoopTarget(nullptr) {
874
875}
876
877// CONSTRUCTOR
878SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
879                                             std::string && kernelName,
880                                             std::vector<Binding> && stream_inputs,
881                                             std::vector<Binding> && stream_outputs,
882                                             std::vector<Binding> && scalar_parameters,
883                                             std::vector<Binding> && scalar_outputs,
884                                             std::vector<Binding> && internal_scalars)
885: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
886
887}
Note: See TracBrowser for help on using the repository browser.