source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5335

Last change on this file since 5335 was 5330, checked in by cameron, 3 years ago

Make circular copyback testing available for all block-oriented kernels

File size: 29.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain.h>
8#include <kernels/streamset.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Function.h>
11#include <llvm/IR/Instructions.h>
12#include <llvm/IR/Module.h>
13#include <llvm/Support/raw_ostream.h>
14#include <llvm/IR/LegacyPassManager.h>
15#include <llvm/Transforms/Scalar.h>
16#ifndef NDEBUG
17#include <llvm/IR/Verifier.h>
18#endif
19
20static const auto DO_BLOCK_SUFFIX = "_DoBlock";
21
22static const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
23
24static const auto LOGICAL_SEGMENT_NO_SCALAR = "logicalSegNo";
25
26static const auto PROCESSED_ITEM_COUNT_SUFFIX = "_processedItemCount";
27
28static const auto PRODUCED_ITEM_COUNT_SUFFIX = "_producedItemCount";
29
30static const auto TERMINATION_SIGNAL = "terminationSignal";
31
32static const auto BUFFER_PTR_SUFFIX = "_bufferPtr";
33
34static const auto BLOCK_MASK_SUFFIX = "_blkMask";
35
36using namespace llvm;
37using namespace kernel;
38using namespace parabix;
39using namespace llvm::legacy;
40
41unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
42    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
43        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
44    }
45    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
46        report_fatal_error(getName() + " already contains scalar field " + name);
47    }
48    const auto index = mKernelFields.size();
49    mKernelMap.emplace(name, index);
50    mKernelFields.push_back(type);
51    return index;
52}
53
54unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
55    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
56        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
57    }
58    const auto index = mKernelFields.size();
59    mKernelFields.push_back(type);
60    return index;
61}
62
63void KernelBuilder::prepareKernelSignature() {
64    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
65        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
66    }
67    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
68        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
69    }
70}
71   
72void KernelBuilder::prepareKernel() {
73    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
74        report_fatal_error("Cannot prepare kernel after kernel state finalized");
75    }
76    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
77        std::string tmp;
78        raw_string_ostream out(tmp);
79        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
80            << mStreamSetInputs.size() << " input stream sets.";
81        report_fatal_error(out.str());
82    }
83    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
84        std::string tmp;
85        raw_string_ostream out(tmp);
86        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
87            << mStreamSetOutputs.size() << " output stream sets.";
88        report_fatal_error(out.str());
89    }
90    const auto blockSize = iBuilder->getBitBlockWidth();
91    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
92        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
93            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
94        }
95        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
96        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
97            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
98        }
99       
100    }
101    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
102        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
103        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
104            addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
105        }
106    }
107    for (const auto binding : mScalarInputs) {
108        addScalar(binding.type, binding.name);
109    }
110    for (const auto binding : mScalarOutputs) {
111        addScalar(binding.type, binding.name);
112    }
113    if (mStreamSetNameMap.empty()) {
114        prepareKernelSignature();
115    }
116    for (auto binding : mInternalScalars) {
117        addScalar(binding.type, binding.name);
118    }
119    addScalar(iBuilder->getSizeTy(), LOGICAL_SEGMENT_NO_SCALAR);
120    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
121    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
122}
123
124std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
125    auto saveModule = iBuilder->getModule();
126    auto savePoint = iBuilder->saveIP();
127    auto module = make_unique<Module>(getName() + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
128    iBuilder->setModule(module.get());
129    generateKernel(inputs, outputs);
130    iBuilder->setModule(saveModule);
131    iBuilder->restoreIP(savePoint);
132    return module;
133}
134
135void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
136
137    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
138    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
139        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
140            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
141                               + " cannot be null when calling generateKernel()");
142        }
143    }
144    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
145        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
146                           " input stream sets but generateKernel() was given "
147                           + std::to_string(mStreamSetInputBuffers.size()));
148    }
149
150    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
151    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
152        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
153            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
154                               + " cannot be null when calling generateKernel()");
155        }
156    }
157    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
158        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
159                           + " output stream sets but generateKernel() was given "
160                           + std::to_string(mStreamSetOutputBuffers.size()));
161    }
162
163
164    auto savePoint = iBuilder->saveIP();
165    prepareKernel(); // possibly overridden by the KernelBuilder subtype
166    addKernelDeclarations(iBuilder->getModule());
167    callGenerateInitMethod();
168    generateInternalMethods();
169    callGenerateDoSegmentMethod();
170    // Implement the accumulator get functions
171    for (auto binding : mScalarOutputs) {
172        Function * f = getAccumulatorFunction(binding.name);
173        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
174        Value * self = &*(f->arg_begin());
175        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
176        Value * retVal = iBuilder->CreateLoad(ptr);
177        iBuilder->CreateRet(retVal);
178    }
179    iBuilder->restoreIP(savePoint);
180}
181
182void KernelBuilder::callGenerateDoSegmentMethod() {
183    mCurrentFunction = getDoSegmentFunction();
184    iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
185    auto args = mCurrentFunction->arg_begin();
186    mSelf = &*(args++);
187    Value * doFinal = &*(args++);
188    std::vector<Value *> producerPos;
189    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
190        producerPos.push_back(&*(args++));
191    }
192    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
193    iBuilder->CreateRetVoid();
194}
195
196void KernelBuilder::callGenerateInitMethod() {
197    mCurrentFunction = getInitFunction();
198    iBuilder->SetInsertPoint(CreateBasicBlock("Init_entry"));
199    Function::arg_iterator args = mCurrentFunction->arg_begin();
200    mSelf = &*(args++);
201    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), mSelf);
202    for (auto binding : mScalarInputs) {
203        Value * param = &*(args++);
204        Value * ptr = iBuilder->CreateGEP(mSelf, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
205        iBuilder->CreateStore(param, ptr);
206    }
207    generateInitMethod();
208    iBuilder->CreateRetVoid();
209}
210
211ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
212    const auto f = mKernelMap.find(name);
213    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
214        report_fatal_error(getName() + " does not contain scalar: " + name);
215    }
216    return iBuilder->getInt32(f->second);
217}
218
219unsigned KernelBuilder::getScalarCount() const {
220    return mKernelFields.size();
221}
222
223Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
224    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
225}
226
227Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
228    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
229}
230
231Value * KernelBuilder::getScalarField(Value * instance, const std::string & fieldName) const {
232    return iBuilder->CreateLoad(getScalarFieldPtr(instance, fieldName));
233}
234
235Value * KernelBuilder::getScalarField(Value * instance, Value * index) const {
236    return iBuilder->CreateLoad(getScalarFieldPtr(instance, index));
237}
238
239void KernelBuilder::setScalarField(Value * instance, const std::string & fieldName, Value * value) const {
240    iBuilder->CreateStore(value, getScalarFieldPtr(instance, fieldName));
241}
242
243void KernelBuilder::setScalarField(Value * instance, Value * index, Value * value) const {
244    iBuilder->CreateStore(value, getScalarFieldPtr(instance, index));
245}
246
247Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
248    unsigned ssIdx = getStreamSetIndex(name);
249    if (mStreamSetInputs[ssIdx].rate.isExact()) {
250        Value * principalItemsProcessed = getScalarField(instance, mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX);
251        return mStreamSetInputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
252    }
253    return getScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX);
254}
255
256Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name) const {
257    unsigned ssIdx = getStreamSetIndex(name);
258    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
259        std::string principalField = mStreamSetInputs.empty() ? mStreamSetOutputs[0].name + PRODUCED_ITEM_COUNT_SUFFIX : mStreamSetInputs[0].name + PROCESSED_ITEM_COUNT_SUFFIX;
260        Value * principalItemsProcessed = getScalarField(instance, principalField);
261        return mStreamSetOutputs[ssIdx].rate.CreateRatioCalculation(iBuilder, principalItemsProcessed);
262    }
263    return getScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX);
264}
265
266void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
267    //iBuilder->CallPrintInt(getName() + " " + name + " processed", value);
268    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
269}
270
271void KernelBuilder::setProducedItemCount(Value * instance, const std::string & name, Value * value) const {
272    //iBuilder->CallPrintInt(getName() + " " + name +  " produced", value);
273    setScalarField(instance, name + PRODUCED_ITEM_COUNT_SUFFIX, value);
274}
275
276Value * KernelBuilder::getTerminationSignal(Value * instance) const {
277    return getScalarField(instance, TERMINATION_SIGNAL);
278}
279
280void KernelBuilder::setTerminationSignal(Value * instance) const {
281    setScalarField(instance, TERMINATION_SIGNAL, iBuilder->getInt1(true));
282}
283
284LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
285    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
286}
287
288void KernelBuilder::releaseLogicalSegmentNo(Value * instance, Value * newCount) const {
289    iBuilder->CreateAtomicStoreRelease(newCount, getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
290}
291
292inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
293    for (const Binding & b : bindings) {
294        if (b.name == name) {
295            const auto divisor = iBuilder->getBitBlockWidth();
296            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
297                return iBuilder->CreateLShr(itemCount, std::log2(divisor));
298            } else {
299                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
300            }
301        }
302    }
303    report_fatal_error("Error: no binding in " + getName() + " for " + name);
304}
305
306
307
308Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
309    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
310    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
311    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex);
312}
313
314Value * KernelBuilder::loadInputStreamBlock(const std::string & name, Value * streamIndex) const {
315    return iBuilder->CreateBlockAlignedLoad(getInputStreamBlockPtr(name, streamIndex));
316}
317
318Value * KernelBuilder::getInputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
319    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
320    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
321    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex);
322}
323
324Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) const {
325    return iBuilder->CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
326}
327
328llvm::Value * KernelBuilder::getInputStreamSetCount(const std::string & name) const {
329    return getInputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
330}
331
332llvm::Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const {
333    Value * blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
334    blockIndex = iBuilder->CreateAdd(blockIndex, blockAdjustment);
335    const StreamSetBuffer * const buf = getInputStreamSetBuffer(name);
336    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex);
337}
338
339Value * KernelBuilder::getOutputStreamBlockPtr(const std::string & name, Value * streamIndex) const {
340    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
341    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
342    return buf->getStreamBlockPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex);
343}
344
345void KernelBuilder::storeOutputStreamBlock(const std::string & name, Value * streamIndex, Value * toStore) const {
346    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamBlockPtr(name, streamIndex));
347}
348
349Value * KernelBuilder::getOutputStreamPackPtr(const std::string & name, Value * streamIndex, Value * packIndex) const {
350    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
351    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
352    return buf->getStreamPackPtr(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex);
353}
354
355void KernelBuilder::storeOutputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex, Value * toStore) const {
356    return iBuilder->CreateBlockAlignedStore(toStore, getOutputStreamPackPtr(name, streamIndex, packIndex));
357}
358
359llvm::Value * KernelBuilder::getOutputStreamSetCount(const std::string & name) const {
360    return getOutputStreamSetBuffer(name)->getStreamSetCount(getStreamSetBufferPtr(name));
361}
362
363Value * KernelBuilder::getRawInputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
364    return getInputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
365}
366
367Value * KernelBuilder::getRawOutputPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
368    return getOutputStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
369}
370
371unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
372    const auto f = mStreamSetNameMap.find(name);
373    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
374        report_fatal_error(getName() + " does not contain stream set: " + name);
375    }
376    return f->second;
377}
378
379Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
380    return getScalarField(getSelf(), name + BUFFER_PTR_SUFFIX);
381}
382
383Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
384    for (auto & arg : f->getArgumentList()) {
385        if (arg.getName().equals(name)) {
386            return &arg;
387        }
388    }
389    report_fatal_error(getName() + " does not have parameter " + name);
390}
391
392Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
393    return iBuilder->CreateCall(getDoSegmentFunction(), args);
394}
395
396Value * KernelBuilder::createGetAccumulatorCall(Value * self, const std::string & accumName) const {
397    return iBuilder->CreateCall(getAccumulatorFunction(accumName), {self});
398}
399
400BasicBlock * KernelBuilder::CreateBasicBlock(std::string && name) const {
401    return BasicBlock::Create(iBuilder->getContext(), name, mCurrentFunction);
402}
403
404void KernelBuilder::createInstance() {
405    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
406        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
407    }
408    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
409
410    std::vector<Value *> args;
411    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
412    args.push_back(mKernelInstance);
413    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
414        Value * arg = mInitialArguments[i];
415        if (LLVM_UNLIKELY(arg == nullptr)) {
416            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
417                               + " cannot be null when calling createInstance()");
418        }
419        args.push_back(arg);
420    }
421    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
422        assert (mStreamSetInputBuffers[i]);
423        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
424        if (LLVM_UNLIKELY(arg == nullptr)) {
425            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
426                               + " was not allocated prior to calling createInstance()");
427        }
428        args.push_back(arg);
429    }
430    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
431    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
432        assert (mStreamSetOutputBuffers[i]);
433        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
434        if (LLVM_UNLIKELY(arg == nullptr)) {
435            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
436                               + " was not allocated prior to calling createInstance()");
437        }
438        args.push_back(arg);
439    }
440    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
441    iBuilder->CreateCall(getInitFunction(), args);
442}
443
444//  The default finalBlock method simply dispatches to the doBlock routine.
445void BlockOrientedKernel::generateFinalBlockMethod(Value * remainingBytes) {
446//    std::vector<Value *> args = {self};
447//    for (Argument & arg : function->getArgumentList()){
448//        args.push_back(&arg);
449//    }
450    CreateDoBlockMethodCall();
451}
452
453//Value * BlockOrientedKernel::loadBlock(const std::string & inputName, Value * const streamIndex) const {
454
455//}
456
457//Value * BlockOrientedKernel::loadPack(const std::string & inputName, Value * const streamIndex, Value * const packIndex) const {
458
459//}
460
461
462//  The default doSegment method dispatches to the doBlock routine for
463//  each block of the given number of blocksToDo, and then updates counts.
464void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
465
466    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
467    BasicBlock * const strideLoopCond = CreateBasicBlock(getName() + "_strideLoopCond");
468    BasicBlock * const strideLoopBody = CreateBasicBlock(getName() + "_strideLoopBody");
469    BasicBlock * const stridesDone = CreateBasicBlock(getName() + "_stridesDone");
470    BasicBlock * const doFinalBlock = CreateBasicBlock(getName() + "_doFinalBlock");
471    BasicBlock * const segmentDone = CreateBasicBlock(getName() + "_segmentDone");
472
473    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
474
475    Value * availablePos = producerPos[0];
476    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
477        Value * p = producerPos[i];
478        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
479    }
480
481    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
482    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
483    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
484    iBuilder->CreateBr(strideLoopCond);
485
486    iBuilder->SetInsertPoint(strideLoopCond);
487    PHINode * stridesRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "stridesRemaining");
488    stridesRemaining->addIncoming(stridesToDo, entryBlock);
489    Value * notDone = iBuilder->CreateICmpNE(stridesRemaining, iBuilder->getSize(0));
490    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
491
492    iBuilder->SetInsertPoint(strideLoopBody);
493
494    CreateDoBlockMethodCall();
495
496    processed = getProcessedItemCount(mStreamSetInputs[0].name);
497    Value * itemsDone = iBuilder->CreateAdd(processed, stride);
498    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
499   
500    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, iBuilder->getSize(1)), strideLoopBody);
501    iBuilder->CreateBr(strideLoopCond);
502
503    iBuilder->SetInsertPoint(stridesDone);
504
505    // Now conditionally perform the final block processing depending on the doFinal parameter.
506    iBuilder->CreateCondBr(doFinal, doFinalBlock, segmentDone);
507    iBuilder->SetInsertPoint(doFinalBlock);
508
509    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
510
511    CreateDoFinalBlockMethodCall(remainingItems);
512   
513    itemsDone = producerPos[0];
514    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);   
515   
516    setTerminationSignal();
517    iBuilder->CreateBr(segmentDone);
518
519    iBuilder->SetInsertPoint(segmentDone);
520
521}
522
523void BlockOrientedKernel::generateInternalMethods() {
524
525    callGenerateDoBlockMethod();
526
527    callGenerateDoFinalBlockMethod();
528}
529
530void BlockOrientedKernel::callGenerateDoBlockMethod() {
531    mCurrentFunction = getDoBlockFunction();
532    auto args = mCurrentFunction->arg_begin();
533    mSelf = &(*args);
534    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
535    std::vector<Value *> priorProduced;
536    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
537        if (isa<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
538            priorProduced.push_back(getProducedItemCount(mStreamSetOutputs[i].name));
539        }
540    }
541    generateDoBlockMethod(); // must be implemented by the KernelBuilder subtype
542    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
543        unsigned priorIdx = 0;
544        if (auto cb = dyn_cast<CircularCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
545            BasicBlock * copyBack = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
546            BasicBlock * done = CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
547            Value * newlyProduced = iBuilder->CreateSub(getProducedItemCount(mStreamSetOutputs[i].name), priorProduced[priorIdx]);
548            Value * accessible = cb->getLinearlyAccessibleItems(priorProduced[priorIdx]);
549            Value * wraparound = iBuilder->CreateICmpULT(accessible, newlyProduced);
550            iBuilder->CreateCondBr(wraparound, copyBack, done);
551            iBuilder->SetInsertPoint(copyBack);
552            Value * copyItems = iBuilder->CreateSub(newlyProduced, accessible);
553            cb->createCopyBack(getStreamSetBufferPtr(mStreamSetOutputs[i].name), copyItems);
554            iBuilder->CreateBr(done);
555            iBuilder->SetInsertPoint(done);
556            priorIdx++;
557        }
558    }   
559    iBuilder->CreateRetVoid();
560    #ifndef NDEBUG
561    std::string tmp;
562    raw_string_ostream out(tmp);
563    if (verifyFunction(*mCurrentFunction, &out)) {
564        mCurrentFunction->dump();
565        report_fatal_error(getName() + ": " + out.str());
566    }
567    #endif
568    // Use the pass manager to optimize the function.
569    FunctionPassManager fpm(iBuilder->getModule());
570    fpm.add(createReassociatePass());             //Reassociate expressions.
571    fpm.add(createGVNPass());                     //Eliminate common subexpressions.
572    fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
573    fpm.doInitialization();
574    fpm.run(*mCurrentFunction);
575}
576
577
578void BlockOrientedKernel::callGenerateDoFinalBlockMethod() {
579    mCurrentFunction = getDoFinalBlockFunction();
580    auto args = mCurrentFunction->arg_begin();
581    mSelf = &(*args++);
582    Value * const remainingBytes = &(*args);
583    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
584    generateFinalBlockMethod(remainingBytes); // possibly overridden by the KernelBuilder subtype
585    iBuilder->CreateRetVoid();
586}
587
588Function * BlockOrientedKernel::getDoBlockFunction() const {
589    const auto name = getName() + DO_BLOCK_SUFFIX;
590    Function * const f = iBuilder->getModule()->getFunction(name);
591    if (LLVM_UNLIKELY(f == nullptr)) {
592        report_fatal_error("Cannot find " + name);
593    }
594    return f;
595}
596
597CallInst * BlockOrientedKernel::CreateDoBlockMethodCall() const {
598    return iBuilder->CreateCall(getDoBlockFunction(), mSelf);
599}
600
601Function * BlockOrientedKernel::getDoFinalBlockFunction() const {
602    const auto name = getName() + FINAL_BLOCK_SUFFIX;
603    Function * const f = iBuilder->getModule()->getFunction(name);
604    if (LLVM_UNLIKELY(f == nullptr)) {
605        report_fatal_error("Cannot find " + name);
606    }
607    return f;
608}
609
610CallInst * BlockOrientedKernel::CreateDoFinalBlockMethodCall(Value * remainingItems) const {
611    return iBuilder->CreateCall(getDoFinalBlockFunction(), {mSelf, remainingItems});
612}
613
614void BlockOrientedKernel::addAdditionalKernelDeclarations(Module * m, PointerType * selfType) {
615    // Create the doBlock and finalBlock function prototypes
616    FunctionType * const doBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
617    Function * const doBlock = Function::Create(doBlockType, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, m);
618    doBlock->setCallingConv(CallingConv::C);
619    doBlock->setDoesNotThrow();
620    doBlock->setDoesNotCapture(1);
621    auto args = doBlock->arg_begin();
622    args->setName("self");
623
624    FunctionType * const finalBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType, iBuilder->getSizeTy()}, false);
625    Function * const finalBlock = Function::Create(finalBlockType, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, m);
626    finalBlock->setCallingConv(CallingConv::C);
627    finalBlock->setDoesNotThrow();
628    finalBlock->setDoesNotCapture(1);
629    args = finalBlock->arg_begin();
630    args->setName("self");
631    (++args)->setName("remainingBytes");
632}
633
634// CONSTRUCTOR
635KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
636                             std::string && kernelName,
637                             std::vector<Binding> && stream_inputs,
638                             std::vector<Binding> && stream_outputs,
639                             std::vector<Binding> && scalar_parameters,
640                             std::vector<Binding> && scalar_outputs,
641                             std::vector<Binding> && internal_scalars)
642: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
643, mNoTerminateAttribute(false)
644, mDoBlockUpdatesProducedItemCountsAttribute(false) {
645
646}
647
648KernelBuilder::~KernelBuilder() { }
649
650// CONSTRUCTOR
651BlockOrientedKernel::BlockOrientedKernel(IDISA::IDISA_Builder * builder,
652                                         std::string && kernelName,
653                                         std::vector<Binding> && stream_inputs,
654                                         std::vector<Binding> && stream_outputs,
655                                         std::vector<Binding> && scalar_parameters,
656                                         std::vector<Binding> && scalar_outputs,
657                                         std::vector<Binding> && internal_scalars)
658: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
659
660}
661
662
663
664
665// CONSTRUCTOR
666SegmentOrientedKernel::SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
667                                             std::string && kernelName,
668                                             std::vector<Binding> && stream_inputs,
669                                             std::vector<Binding> && stream_outputs,
670                                             std::vector<Binding> && scalar_parameters,
671                                             std::vector<Binding> && scalar_outputs,
672                                             std::vector<Binding> && internal_scalars)
673: KernelBuilder(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
674
675}
Note: See TracBrowser for help on using the repository browser.