source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5418

Last change on this file since 5418 was 5418, checked in by nmedfort, 2 years ago

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File size: 39.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <kernels/toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/MDBuilder.h>
13#include <llvm/IR/Module.h>
14#include <llvm/Support/raw_ostream.h>
15#include <llvm/Bitcode/ReaderWriter.h>
16#include <llvm/Transforms/Utils/Local.h>
17#include <kernels/streamset.h>
18#include <sstream>
19
20static const std::string DO_BLOCK_SUFFIX = "_DoBlock";
21
22static const std::string FINAL_BLOCK_SUFFIX = "_FinalBlock";
23
24static const std::string LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
25
26static const std::string PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
27
28static const std::string CONSUMED_ITEM_COUNT_SUFFIX = "_consumedItemCount";
29
30static const std::string PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
31
32static const std::string TERMINATION_SIGNAL = "terminationSignal";
33
34static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
35
36static const std::string CONSUMER_SUFFIX = "_cls";
37
38using namespace llvm;
39using namespace kernel;
40using namespace parabix;
41
42unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
43    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
44        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
45    }
46    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
47        report_fatal_error(getName() + " already contains scalar field " + name);
48    }
49    const auto index = mKernelFields.size();
50    mKernelMap.emplace(name, index);
51    mKernelFields.push_back(type);
52    return index;
53}
54
55unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
56    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
57        report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
58    }
59    const auto index = mKernelFields.size();
60    mKernelFields.push_back(type);
61    return index;
62}
63
64void KernelBuilder::prepareStreamSetNameMap() {
65    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
66        mStreamMap.emplace(mStreamSetInputs[i].name, std::make_pair(Port::Input, i));
67    }
68    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
69        mStreamMap.emplace(mStreamSetOutputs[i].name, std::make_pair(Port::Output, i));
70    }
71}
72   
73void KernelBuilder::prepareKernel() {
74    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
75        report_fatal_error("Cannot prepare kernel after kernel state finalized");
76    }
77    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
78        std::string tmp;
79        raw_string_ostream out(tmp);
80        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
81            << mStreamSetInputs.size() << " input stream sets.";
82        report_fatal_error(out.str());
83    }
84    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
85        std::string tmp;
86        raw_string_ostream out(tmp);
87        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
88            << mStreamSetOutputs.size() << " output stream sets.";
89        report_fatal_error(out.str());
90    }
91    const auto blockSize = iBuilder->getBitBlockWidth();
92    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
93        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
94            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
95        }
96        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
97        if ((i == 0) || mStreamSetInputs[i].rate.isUnknown()) {
98            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
99        }       
100    }
101
102    IntegerType * const sizeTy = iBuilder->getSizeTy();
103    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
104        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
105        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
106            addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
107        }
108    }
109    for (const auto binding : mScalarInputs) {
110        addScalar(binding.type, binding.name);
111    }
112    for (const auto binding : mScalarOutputs) {
113        addScalar(binding.type, binding.name);
114    }
115    if (mStreamMap.empty()) {
116        prepareStreamSetNameMap();
117    }
118    for (auto binding : mInternalScalars) {
119        addScalar(binding.type, binding.name);
120    }
121
122    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
123    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
124        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
125    }
126
127    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
128    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
129
130    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
131        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
132    }
133
134    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
135}
136
137void KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
138
139    assert (mModule == nullptr);
140    assert (mStreamSetInputBuffers.empty());
141    assert (mStreamSetOutputBuffers.empty());
142
143    if (LLVM_UNLIKELY(mStreamSetInputs.size() != inputs.size())) {
144        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
145                           " input stream sets but was given "
146                           + std::to_string(mStreamSetInputBuffers.size()));
147    }
148
149    for (unsigned i = 0; i < inputs.size(); ++i) {
150        StreamSetBuffer * const buf = inputs[i];
151        if (LLVM_UNLIKELY(buf == nullptr)) {
152            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
153                               + " cannot be null");
154        }
155        buf->addConsumer(this);
156    }
157
158    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != outputs.size())) {
159        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
160                           + " output stream sets but was given "
161                           + std::to_string(mStreamSetOutputBuffers.size()));
162    }
163
164    for (unsigned i = 0; i < outputs.size(); ++i) {
165        StreamSetBuffer * const buf = outputs[i];
166        if (LLVM_UNLIKELY(buf == nullptr)) {
167            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
168                               + " cannot be null");
169        }
170        if (LLVM_LIKELY(buf->getProducer() == nullptr)) {
171            buf->setProducer(this);
172        } else {
173            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
174                               + " is already produced by kernel " + buf->getProducer()->getName());
175        }
176    }
177
178    std::stringstream cacheName;
179
180    cacheName << getName() << '_' << iBuilder->getBuilderUniqueName();
181    for (const StreamSetBuffer * b: inputs) {
182        cacheName <<  ':' <<  b->getUniqueID();
183    }
184    for (const StreamSetBuffer * b: outputs) {
185        cacheName <<  ':' <<  b->getUniqueID();
186    }
187
188    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
189    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
190
191    prepareKernel();
192
193    mModule = new Module(cacheName.str(), iBuilder->getContext());
194    mModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
195}
196
197// Default kernel signature: generate the IR and emit as byte code.
198std::string KernelBuilder::generateKernelSignature(std::string moduleId) {
199    if (moduleIDisSignature()) {
200        return moduleId;
201    } else {
202        generateKernel();
203        std::string signature;
204        raw_string_ostream OS(signature);
205        WriteBitcodeToFile(iBuilder->getModule(), OS);
206        return signature;
207    }
208}
209
210void KernelBuilder::generateKernel() {
211    // If the module id cannot uniquely identify this kernel, "generateKernelSignature()" will have already
212    // generated the unoptimized IR.
213    if (!mIsGenerated) {
214        auto saveInstance = getInstance();
215        auto savePoint = iBuilder->saveIP();
216        addKernelDeclarations(iBuilder->getModule());
217        callGenerateInitializeMethod();
218        callGenerateDoSegmentMethod();       
219        callGenerateFinalizeMethod();
220        iBuilder->restoreIP(savePoint);
221        setInstance(saveInstance);
222        mIsGenerated = true;       
223    }
224}
225
226inline void KernelBuilder::callGenerateInitializeMethod() {
227    mCurrentMethod = getInitFunction();
228    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
229    Function::arg_iterator args = mCurrentMethod->arg_begin();
230    setInstance(&*(args++));
231    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), getInstance());
232    for (auto binding : mScalarInputs) {
233        setScalarField(binding.name, &*(args++));
234    }
235    for (auto binding : mStreamSetOutputs) {
236        setConsumerState(binding.name, &*(args++));
237    }
238    generateInitializeMethod();
239    iBuilder->CreateRetVoid();
240}
241
242inline void KernelBuilder::callGenerateDoSegmentMethod() {
243    mCurrentMethod = getDoSegmentFunction();
244    BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
245    iBuilder->SetInsertPoint(entry);
246    auto args = mCurrentMethod->arg_begin();
247    setInstance(&*(args++));
248    mIsFinal = &*(args++);
249    const auto n = mStreamSetInputs.size();
250    mAvailableItemCount.resize(n, nullptr);
251    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
252        mAvailableItemCount[i] = &*(args++);
253    }
254    generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
255    mIsFinal = nullptr;
256    mAvailableItemCount.clear();
257    iBuilder->CreateRetVoid();
258}
259
260inline void KernelBuilder::callGenerateFinalizeMethod() {
261    mCurrentMethod = getTerminateFunction();
262    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
263    auto args = mCurrentMethod->arg_begin();
264    setInstance(&*(args++));
265    generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
266    const auto n = mScalarOutputs.size();
267    if (n == 0) {
268        iBuilder->CreateRetVoid();
269    } else {
270        Value * outputs[n];
271        for (unsigned i = 0; i < n; ++i) {
272            outputs[i] = getScalarField(mScalarOutputs[i].name);
273        }
274        if (n == 1) {
275            iBuilder->CreateRet(outputs[0]);
276        } else {
277            iBuilder->CreateAggregateRet(outputs, n);
278        }
279    }
280}
281
282ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
283    const auto f = mKernelMap.find(name);
284    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
285        report_fatal_error(getName() + " does not contain scalar: " + name);
286    }
287    return iBuilder->getInt32(f->second);
288}
289
290Value * KernelBuilder::getProducedItemCount(const std::string & name, Value * doFinal) const {
291    Port port; unsigned ssIdx;
292    std::tie(port, ssIdx) = getStreamPort(name);
293    assert (port == Port::Output);
294    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
295        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
296        std::string principalField;
297        if (refSet.empty()) {
298            if (mStreamSetInputs.empty()) {
299                principalField = mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX;
300            } else {
301                principalField = mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
302            }
303        } else {
304            Port port; unsigned pfIndex;
305            std::tie(port, pfIndex) = getStreamPort(refSet);
306            if (port == Port::Input) {
307               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
308            } else {
309               principalField = refSet + PRODUCED_ITEM_COUNT_SUFFIX;
310            }
311        }
312        Value * principalItemsProcessed = getScalarField(principalField);
313        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed, doFinal);
314    }
315    return getScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX);
316}
317
318llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
319    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
320        if (mStreamSetInputs[i].name == name) {
321            return mAvailableItemCount[i];
322        }
323    }
324    return nullptr;
325}
326
327Value * KernelBuilder::getProcessedItemCount(const std::string & name) const {
328    Port port; unsigned ssIdx;
329    std::tie(port, ssIdx) = getStreamPort(name);
330    assert (port == Port::Input);
331    if (mStreamSetInputs[ssIdx].rate.isExact()) {
332        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
333        if (refSet.empty()) {
334            refSet = mStreamSetInputs[0].name;
335        }
336        Value * principalItemsProcessed = getScalarField(refSet + PROCESSED_ITEM_COUNT_SUFFIX);
337        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
338    }
339    return getScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX);
340}
341
342Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
343    return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
344}
345
346void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
347    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
348}
349
350void KernelBuilder::setProcessedItemCount(const std::string & name, Value * value) const {
351    setScalarField(name + PROCESSED_ITEM_COUNT_SUFFIX, value);
352}
353
354void KernelBuilder::setConsumedItemCount(const std::string & name, Value * value) const {
355    setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
356}
357
358Value * KernelBuilder::getTerminationSignal() const {
359    return getScalarField(TERMINATION_SIGNAL);
360}
361
362void KernelBuilder::setTerminationSignal() const {
363    setScalarField(TERMINATION_SIGNAL, iBuilder->getTrue());
364}
365
366LoadInst * KernelBuilder::acquireLogicalSegmentNo() const {
367    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
368}
369
370void KernelBuilder::releaseLogicalSegmentNo(Value * nextSegNo) const {
371    iBuilder->CreateAtomicStoreRelease(nextSegNo, getScalarFieldPtr(getInstance(), LOGICAL_SEGMENT_NO_SCALAR));
372}
373
374llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
375    return getScalarField(name + CONSUMER_SUFFIX);
376}
377
378void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
379    setScalarField(name + CONSUMER_SUFFIX, value);
380}
381
382inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
383    for (const Binding & b : bindings) {
384        if (b.name == name) {
385            const auto divisor = iBuilder->getBitBlockWidth();
386            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
387                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
388            } else {
389                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
390            }
391        }
392    }
393    report_fatal_error("Error: no binding in " + getName() + " for " + name);
394}
395
396Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
397    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
398    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
399    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
400}
401
402Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
403    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
404}
405
406Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
407    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
408    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
409    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, true);
410}
411
412Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
413    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
414}
415
416llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
417    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
418}
419
420llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
421    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
422    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
423    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
424    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, true);
425}
426
427Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
428    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
429    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
430    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, false);
431}
432
433void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
434    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
435}
436
437Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
438    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
439    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
440    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex, false);
441}
442
443void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
444    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
445}
446
447llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
448    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
449}
450
451Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
452    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
453}
454
455Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
456    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
457}
458
459Value * KernelBuilder::getBaseAddress(const std::string & name) const {
460    return getAnyStreamSetBuffer(name)->getBaseAddress(getStreamSetBufferPtr(name));
461}
462
463void KernelBuilder::setBaseAddress(const std::string & name, Value * const addr) const {
464    return getAnyStreamSetBuffer(name)->setBaseAddress(getStreamSetBufferPtr(name), addr);
465}
466
467Value * KernelBuilder::getBufferedSize(const std::string & name) const {
468    return getAnyStreamSetBuffer(name)->getBufferedSize(getStreamSetBufferPtr(name));
469}
470
471void KernelBuilder::setBufferedSize(const std::string & name, Value * size) const {
472    unsigned index; Port port;
473    std::tie(port, index) = getStreamPort(name);
474    const StreamSetBuffer * buf = nullptr;
475    if (port == Port::Input) {
476        assert (index < mStreamSetInputBuffers.size());
477        buf = mStreamSetInputBuffers[index];
478    } else {
479        assert (index < mStreamSetOutputBuffers.size());
480        buf = mStreamSetOutputBuffers[index];
481    }
482    buf->setBufferedSize(getStreamSetBufferPtr(name), size);
483}
484
485void KernelBuilder::reserveBytes(const std::string & name, llvm::Value * value) const {
486    Value * itemCount = getProducedItemCount(name);
487    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
488    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
489}
490
491Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
492    return getScalarField(name + BUFFER_PTR_SUFFIX);
493}
494
495Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
496    for (auto & arg : f->getArgumentList()) {
497        if (arg.getName().equals(name)) {
498            return &arg;
499        }
500    }
501    report_fatal_error(getName() + " does not have parameter " + name);
502}
503
504CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
505    Function * const doSegment = getDoSegmentFunction();
506    assert (doSegment->getArgumentList().size() == args.size());
507    return iBuilder->CreateCall(doSegment, args);
508}
509
510Value * KernelBuilder::getAccumulator(const std::string & accumName) const {
511    if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
512        report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
513    }
514    const auto n = mScalarOutputs.size();
515    if (LLVM_UNLIKELY(n == 0)) {
516        report_fatal_error(getName() + " has no output scalars.");
517    } else {
518        for (unsigned i = 0; i < n; ++i) {
519            const Binding & b = mScalarOutputs[i];
520            if (b.name == accumName) {
521                if (n == 1) {
522                    return mOutputScalarResult;
523                } else {
524                    return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
525                }
526            }
527        }
528        report_fatal_error(getName() + " has no output scalar named " + accumName);
529    }
530}
531
532BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
533    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentMethod);
534}
535
536Value * KernelBuilder::createInstance() {
537    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
538        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
539    }
540    setInstance(iBuilder->CreateCacheAlignedAlloca(mKernelStateType));
541    return getInstance();
542}
543
544void KernelBuilder::initializeInstance() {
545
546
547    if (LLVM_UNLIKELY(getInstance() == nullptr)) {
548        report_fatal_error("Cannot initialize " + getName() + " before calling createInstance()");
549    }
550    std::vector<Value *> args;
551    args.reserve(1 + mInitialArguments.size() + mStreamSetInputBuffers.size() + (mStreamSetOutputBuffers.size() * 2));
552    args.push_back(getInstance());
553    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
554        Value * arg = mInitialArguments[i];
555        if (LLVM_UNLIKELY(arg == nullptr)) {
556            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
557                               + " cannot be null when calling createInstance()");
558        }
559        args.push_back(arg);
560    }
561    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
562        assert (mStreamSetInputBuffers[i]);
563        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
564        if (LLVM_UNLIKELY(arg == nullptr)) {
565            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
566                               + " was not allocated prior to calling createInstance()");
567        }
568        args.push_back(arg);
569    }
570    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
571    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
572        assert (mStreamSetOutputBuffers[i]);
573        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
574        if (LLVM_UNLIKELY(arg == nullptr)) {
575            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
576                               + " was not allocated prior to calling createInstance()");
577        }
578        args.push_back(arg);
579    }
580    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
581
582    IntegerType * const sizeTy = iBuilder->getSizeTy();
583    PointerType * const sizePtrTy = sizeTy->getPointerTo();
584    PointerType * const sizePtrPtrTy = sizePtrTy->getPointerTo();
585    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
586    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
587        const auto output = mStreamSetOutputBuffers[i];
588        const auto & consumers = output->getConsumers();
589        const auto n = consumers.size();
590        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
591        Value * const consumerSegNoArray = iBuilder->CreateAlloca(ArrayType::get(sizePtrTy, n));
592        for (unsigned i = 0; i < n; ++i) {
593            KernelBuilder * const consumer = consumers[i];
594            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
595            Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
596            iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
597        }
598        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
599        iBuilder->CreateStore(iBuilder->getSize(n), consumerCountPtr);
600        Value * const consumerSegNoArrayPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
601        iBuilder->CreateStore(iBuilder->CreatePointerCast(consumerSegNoArray, sizePtrPtrTy), consumerSegNoArrayPtr);
602        args.push_back(outputConsumers);
603    }
604
605
606    iBuilder->CreateCall(getInitFunction(), args);
607}
608
609//  The default doSegment method dispatches to the doBlock routine for
610//  each block of the given number of blocksToDo, and then updates counts.
611
612void BlockOrientedKernel::generateDoSegmentMethod() {
613
614    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
615    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
616    mStrideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
617    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
618    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
619    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
620
621    Value * baseTarget = nullptr;
622    if (useIndirectBr()) {
623        baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
624    }
625
626    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
627    Value * availablePos = mAvailableItemCount[0];
628    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
629    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
630    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
631
632    iBuilder->CreateBr(strideLoopCond);
633
634    iBuilder->SetInsertPoint(strideLoopCond);
635
636    PHINode * branchTarget = nullptr;
637    if (useIndirectBr()) {
638        branchTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "branchTarget");
639        branchTarget->addIncoming(baseTarget, entryBlock);
640    }
641
642    PHINode * const stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
643    stridesRemaining->addIncoming(stridesToDo, entryBlock);
644    // NOTE: stridesRemaining may go to a negative number in the final block if the generateFinalBlockMethod(...)
645    // calls CreateDoBlockMethodCall(). Do *not* replace the comparator with an unsigned one!
646    Value * notDone = iBuilder->CreateICmpSGT(stridesRemaining, iBuilder->getSize(0));
647    iBuilder->CreateLikelyCondBr(notDone, mStrideLoopBody, stridesDone);
648
649    iBuilder->SetInsertPoint(mStrideLoopBody);
650
651    if (useIndirectBr()) {
652        mStrideLoopTarget = iBuilder->CreatePHI(baseTarget->getType(), 2, "strideTarget");
653        mStrideLoopTarget->addIncoming(branchTarget, strideLoopCond);
654    }
655
656    /// GENERATE DO BLOCK METHOD
657
658    writeDoBlockMethod();
659
660    /// UPDATE PROCESSED COUNTS
661
662    processed = getProcessedItemCount(mStreamSetInputs[0].name);
663    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
664    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
665
666    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
667
668    BasicBlock * bodyEnd = iBuilder->GetInsertBlock();
669    if (useIndirectBr()) {
670        branchTarget->addIncoming(mStrideLoopTarget, bodyEnd);
671    }
672    iBuilder->CreateBr(strideLoopCond);
673
674    stridesDone->moveAfter(bodyEnd);
675
676    iBuilder->SetInsertPoint(stridesDone);
677
678    // Now conditionally perform the final block processing depending on the doFinal parameter.
679    if (useIndirectBr()) {
680        mStrideLoopBranch = iBuilder->CreateIndirectBr(branchTarget, 3);
681        mStrideLoopBranch->addDestination(doFinalBlock);
682        mStrideLoopBranch->addDestination(segmentDone);
683    } else {
684        iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
685    }
686
687    doFinalBlock->moveAfter(stridesDone);
688
689    iBuilder->SetInsertPoint(doFinalBlock);
690
691    Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
692    writeFinalBlockMethod(remainingItems);
693
694    itemsDone = mAvailableItemCount[0];
695    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
696    setTerminationSignal();
697    iBuilder->CreateBr(segmentDone);
698
699    segmentDone->moveAfter(iBuilder->GetInsertBlock());
700
701    iBuilder->SetInsertPoint(segmentDone);
702
703    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
704    if (useIndirectBr()) {
705        MDBuilder mdb(iBuilder->getContext());
706        const auto destinations = mStrideLoopBranch->getNumDestinations();
707        uint32_t weights[destinations];
708        for (unsigned i = 0; i < destinations; ++i) {
709            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
710        }
711        ArrayRef<uint32_t> bw(weights, destinations);
712        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
713    }
714
715}
716
717inline void BlockOrientedKernel::writeDoBlockMethod() {
718
719    Value * const self = getInstance();
720    Function * const cp = mCurrentMethod;
721    auto ip = iBuilder->saveIP();
722
723    /// Check if the do block method is called and create the function if necessary   
724    if (!useIndirectBr()) {
725        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType()}, false);
726        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
727        mCurrentMethod->setCallingConv(CallingConv::C);
728        mCurrentMethod->setDoesNotThrow();
729        mCurrentMethod->setDoesNotCapture(1);
730        auto args = mCurrentMethod->arg_begin();
731        mCurrentMethod = mCurrentMethod;
732        args->setName("self");
733        setInstance(&*args);
734        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
735    }
736
737    std::vector<Value *> priorProduced;
738    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
739        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]) || isa<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
740            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
741        }
742    }
743
744    generateDoBlockMethod(); // must be implemented by the BlockOrientedKernelBuilder subtype
745
746    unsigned priorIdx = 0;
747    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
748        Value * log2BlockSize = iBuilder->getSize(std::log2(iBuilder->getBitBlockWidth()));
749        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
750            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
751            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
752            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
753            Value * priorBlock = iBuilder->CreateLShr(priorProduced[priorIdx], log2BlockSize);
754            Value * priorOffset = iBuilder->CreateAnd(priorProduced[priorIdx], iBuilder->getSize(iBuilder->getBitBlockWidth() - 1));
755            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
756            Value * accessibleBlocks = cb->getLinearlyAccessibleBlocks(instance, priorBlock);
757            Value * accessible = iBuilder->CreateSub(iBuilder->CreateShl(accessibleBlocks, log2BlockSize), priorOffset);
758            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
759            iBuilder->CreateCondBr(wraparound, copyBack, done);
760            iBuilder->SetInsertPoint(copyBack);
761            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
762            cb->createCopyBack(instance, copyItems);
763            iBuilder->CreateBr(done);
764            iBuilder->SetInsertPoint(done);
765            priorIdx++;
766        }
767        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
768            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
769            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
770            Value * instance = getStreamSetBufferPtr(mStreamSetOutputs[i].name);
771            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
772            Value * accessible = cb->getLinearlyAccessibleItems(instance, priorProduced[priorIdx]);
773            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
774            iBuilder->CreateCondBr(wraparound, copyBack, done);
775            iBuilder->SetInsertPoint(copyBack);
776            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
777            cb->createCopyBack(instance, copyItems);
778            iBuilder->CreateBr(done);
779            iBuilder->SetInsertPoint(done);
780            priorIdx++;
781        }
782    }
783
784    /// Call the do block method if necessary then restore the current function state to the do segement method
785    if (!useIndirectBr()) {
786        iBuilder->CreateRetVoid();
787        mDoBlockMethod = mCurrentMethod;
788        iBuilder->restoreIP(ip);
789        iBuilder->CreateCall(mCurrentMethod, self);
790        setInstance(self);
791        mCurrentMethod = cp;
792    }
793
794}
795
796inline void BlockOrientedKernel::writeFinalBlockMethod(Value * remainingItems) {
797
798    Value * const self = getInstance();
799    Function * const cp = mCurrentMethod;
800    Value * const remainingItemCount = remainingItems;
801    auto ip = iBuilder->saveIP();
802
803    if (!useIndirectBr()) {
804        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {self->getType(), iBuilder->getSizeTy()}, false);
805        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
806        mCurrentMethod->setCallingConv(CallingConv::C);
807        mCurrentMethod->setDoesNotThrow();
808        mCurrentMethod->setDoesNotCapture(1);
809        auto args = mCurrentMethod->arg_begin();
810        args->setName("self");
811        setInstance(&*args);
812        remainingItems = &*(++args);
813        remainingItems->setName("remainingItems");
814        iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
815    }
816
817    generateFinalBlockMethod(remainingItems); // may be implemented by the BlockOrientedKernel subtype
818
819    RecursivelyDeleteTriviallyDeadInstructions(remainingItems); // if remainingItems was not used, this will eliminate it.
820
821    if (!useIndirectBr()) {
822        iBuilder->CreateRetVoid();       
823        iBuilder->restoreIP(ip);
824        iBuilder->CreateCall(mCurrentMethod, {self, remainingItemCount});
825        mCurrentMethod = cp;
826        setInstance(self);
827    }
828
829}
830
831//  The default finalBlock method simply dispatches to the doBlock routine.
832void BlockOrientedKernel::generateFinalBlockMethod(Value * /* remainingItems */) {
833    CreateDoBlockMethodCall();
834}
835
836void BlockOrientedKernel::CreateDoBlockMethodCall() {
837    if (useIndirectBr()) {
838        BasicBlock * bb = CreateBasicBlock("resume");
839        mStrideLoopBranch->addDestination(bb);
840        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), iBuilder->GetInsertBlock());
841        iBuilder->CreateBr(mStrideLoopBody);
842        bb->moveAfter(iBuilder->GetInsertBlock());
843        iBuilder->SetInsertPoint(bb);
844    } else {
845        iBuilder->CreateCall(mDoBlockMethod, getInstance());
846    }
847}
848
849void KernelBuilder::finalizeInstance() {
850    mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
851}
852
853KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
854    const auto f = mStreamMap.find(name);
855    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
856        report_fatal_error(getName() + " does not contain stream set " + name);
857    }
858    return f->second;
859}
860
861// CONSTRUCTOR
862KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
863                             std::string && kernelName,
864                             std::vector<Binding> && stream_inputs,
865                             std::vector<Binding> && stream_outputs,
866                             std::vector<Binding> && scalar_parameters,
867                             std::vector<Binding> && scalar_outputs,
868                             std::vector<Binding> && internal_scalars)
869: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
870, mModule(nullptr)
871, mCurrentMethod(nullptr)
872, mNoTerminateAttribute(false)
873, mIsGenerated(false)
874, mIsFinal(nullptr)
875, mOutputScalarResult(nullptr) {
876
877}
878
879KernelBuilder::~KernelBuilder() {
880
881}
882
883// CONSTRUCTOR
884BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
885                                         std::string && kernelName,
886                                         std::vector<Binding> && stream_inputs,
887                                         std::vector<Binding> && stream_outputs,
888                                         std::vector<Binding> && scalar_parameters,
889                                         std::vector<Binding> && scalar_outputs,
890                                         std::vector<Binding> && internal_scalars)
891: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
892, mDoBlockMethod(nullptr)
893, mStrideLoopBody(nullptr)
894, mStrideLoopBranch(nullptr)
895, mStrideLoopTarget(nullptr) {
896
897}
898
899// CONSTRUCTOR
900SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
901                                             std::string && kernelName,
902                                             std::vector<Binding> && stream_inputs,
903                                             std::vector<Binding> && stream_outputs,
904                                             std::vector<Binding> && scalar_parameters,
905                                             std::vector<Binding> && scalar_outputs,
906                                             std::vector<Binding> && internal_scalars)
907: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
908
909}
910
Note: See TracBrowser for help on using the repository browser.