source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5261

Last change on this file since 5261 was 5261, checked in by cameron, 9 months ago

Move responsibility for ProducedItemCount? into doSegment unless overridden

File size: 32.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Value.h>               // for Value
8#include <llvm/Support/ErrorHandling.h>  // for report_fatal_error
9#include <toolchain.h>                   // for BufferSegments, SegmentSize
10#include "IR_Gen/idisa_builder.h"        // for IDISA_Builder
11#include "kernels/streamset.h"           // for StreamSetBuffer
12#include "llvm/ADT/StringRef.h"          // for StringRef, operator==
13#include "llvm/IR/CallingConv.h"         // for ::C
14#include "llvm/IR/Constant.h"            // for Constant
15#include "llvm/IR/Constants.h"           // for ConstantInt
16#include "llvm/IR/Function.h"            // for Function, Function::arg_iter...
17#include "llvm/IR/Instructions.h"        // for LoadInst (ptr only), PHINode
18#include "llvm/Support/Compiler.h"       // for LLVM_UNLIKELY
19namespace llvm { class BasicBlock; }
20namespace llvm { class Module; }
21namespace llvm { class Type; }
22
23using namespace llvm;
24using namespace kernel;
25using namespace parabix;
26
27KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
28                             std::string kernelName,
29                             std::vector<Binding> stream_inputs,
30                             std::vector<Binding> stream_outputs,
31                             std::vector<Binding> scalar_parameters,
32                             std::vector<Binding> scalar_outputs,
33                             std::vector<Binding> internal_scalars)
34: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars),
35mNoTerminateAttribute(false),
36mDoBlockUpdatesProducedItemCountsAttribute(false) {
37
38}
39
40unsigned KernelBuilder::addScalar(Type * type, const std::string & name) {
41    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
42        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50void KernelBuilder::prepareKernel() {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        llvm::report_fatal_error("Cannot prepare kernel after kernel state finalized");
53    }
54    unsigned blockSize = iBuilder->getBitBlockWidth();
55    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
56        std::string tmp;
57        raw_string_ostream out(tmp);
58        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
59            << mStreamSetInputs.size() << " input stream sets.";
60        throw std::runtime_error(out.str());
61    }
62    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
63        std::string tmp;
64        raw_string_ostream out(tmp);
65        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
66            << mStreamSetOutputs.size() << " output stream sets.";
67        throw std::runtime_error(out.str());
68    }
69    int streamSetNo = 0;
70    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
71        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
72             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
73        }
74        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
75        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
76        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
77        streamSetNo++;
78    }
79    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
80        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
81        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
82        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
83        streamSetNo++;
84    }
85    for (auto binding : mScalarInputs) {
86        addScalar(binding.type, binding.name);
87    }
88    for (auto binding : mScalarOutputs) {
89        addScalar(binding.type, binding.name);
90    }
91    for (auto binding : mInternalScalars) {
92        addScalar(binding.type, binding.name);
93    }
94    addScalar(iBuilder->getSizeTy(), blockNoScalar);
95    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
96    addScalar(iBuilder->getInt1Ty(), terminationSignal);
97    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
98}
99
100std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
101    auto saveModule = iBuilder->getModule();
102    auto savePoint = iBuilder->saveIP();
103    auto module = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
104    iBuilder->setModule(module.get());
105    generateKernel(inputs, outputs);
106    iBuilder->setModule(saveModule);
107    iBuilder->restoreIP(savePoint);
108    return module;
109}
110
111void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
112    auto savePoint = iBuilder->saveIP();
113    Module * const m = iBuilder->getModule();
114    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
115    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
116    prepareKernel();            // possibly overridden by the KernelBuilder subtype
117    addKernelDeclarations(m);
118    generateInitMethod();       // possibly overridden by the KernelBuilder subtype
119    generateDoBlockMethod();    // must be implemented by the KernelBuilder subtype
120    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
121    generateDoSegmentMethod();
122    generateFinalSegmentMethod();
123
124    // Implement the accumulator get functions
125    for (auto binding : mScalarOutputs) {
126        auto fnName = mKernelName + accumulator_infix + binding.name;
127        Function * accumFn = m->getFunction(fnName);
128        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
129        Value * self = &*(accumFn->arg_begin());
130        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
131        Value * retVal = iBuilder->CreateLoad(ptr);
132        iBuilder->CreateRet(retVal);
133    }
134    generateInitMethod();
135    iBuilder->restoreIP(savePoint);
136}
137
138// Default init method, possibly overridden if special init actions required.
139void KernelBuilder::generateInitMethod() const {
140    auto savePoint = iBuilder->saveIP();
141    Module * const m = iBuilder->getModule();
142    Function * initFunction = m->getFunction(mKernelName + init_suffix);
143    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));   
144    Function::arg_iterator args = initFunction->arg_begin();
145    Value * self = &*(args++);
146    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), self);
147    for (auto binding : mScalarInputs) {
148        Value * param = &*(args++);
149        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
150        iBuilder->CreateStore(param, ptr);
151    }
152    iBuilder->CreateRetVoid();
153    iBuilder->restoreIP(savePoint);
154}
155
156//  The default finalBlock method simply dispatches to the doBlock routine.
157void KernelBuilder::generateFinalBlockMethod() const {
158    auto savePoint = iBuilder->saveIP();
159    Module * m = iBuilder->getModule();
160    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
161    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
162    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
163    // Final Block arguments: self, remaining, then the standard DoBlock args.
164    Function::arg_iterator args = finalBlockFunction->arg_begin();
165    Value * self = &*(args++);
166    /* Skip "remaining" arg */ args++;
167    std::vector<Value *> doBlockArgs = {self};
168    while (args != finalBlockFunction->arg_end()){
169        doBlockArgs.push_back(&*args++);
170    }
171    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
172    iBuilder->CreateRetVoid();
173    iBuilder->restoreIP(savePoint);
174}
175
176// Note: this may be overridden to incorporate doBlock logic directly into
177// the doSegment function.
178void KernelBuilder::generateDoBlockLogic(Value * self, Value * /* blockNo */) const {
179    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
180    iBuilder->CreateCall(doBlockFunction, self);
181}
182
183
184//  The default doSegment method dispatches to the doBlock routine for
185//  each block of the given number of blocksToDo, and then updates counts.
186void KernelBuilder::generateDoSegmentMethod() const {
187    auto savePoint = iBuilder->saveIP();
188    Module * m = iBuilder->getModule();
189    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
190    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
191    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
192    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
193    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
194    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
195    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
196    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
197    Type * const size_ty = iBuilder->getSizeTy();
198    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
199    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
200   
201    Function::arg_iterator args = doSegmentFunction->arg_begin();
202    Value * self = &*(args++);
203    Value * blocksToDo = &*(args);
204   
205    std::vector<Value *> inbufProducerPtrs;
206    std::vector<Value *> endSignalPtrs;
207    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
208        Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
209        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
210        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
211    }
212   
213    std::vector<Value *> producerPos;
214    /* Determine the actually available data examining all input stream sets. */
215    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
216    producerPos.push_back(p);
217    Value * availablePos = producerPos[0];
218    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
219        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
220        producerPos.push_back(p);
221        /* Set the available position to be the minimum of availablePos and producerPos. */
222        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
223    }
224    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
225    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
226//#ifndef NDEBUG
227//    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
228//#endif
229    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
230    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
231    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
232    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
233    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
234    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
235    iBuilder->CreateBr(strideLoopCond);
236
237    iBuilder->SetInsertPoint(strideLoopCond);
238    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
239    stridesRemaining->addIncoming(stridesToDo, entryBlock);
240    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
241    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
242
243    iBuilder->SetInsertPoint(strideLoopBody);
244    Value * blockNo = getScalarField(self, blockNoScalar);   
245
246    generateDoBlockLogic(self, blockNo);
247    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
248    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
249    iBuilder->CreateBr(strideLoopCond);
250   
251    iBuilder->SetInsertPoint(stridesDone);
252   
253    Value * segmentItemsProcessed = iBuilder->CreateMul(stridesToDo, stride);
254    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
255        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
256        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, segmentItemsProcessed));
257    }
258    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
259        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
260            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
261            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
262        }
263    }
264    iBuilder->CreateBr(segmentDone);
265    iBuilder->SetInsertPoint(segmentDone);
266//#ifndef NDEBUG
267//    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
268//#endif
269    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
270        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
271        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
272        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
273        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
274    }
275    iBuilder->CreateBr(finalExit);
276    iBuilder->SetInsertPoint(finalExit);
277
278    iBuilder->CreateRetVoid();
279    iBuilder->restoreIP(savePoint);
280}
281
282void KernelBuilder::generateFinalSegmentMethod() const {
283    auto savePoint = iBuilder->saveIP();
284    Module * m = iBuilder->getModule();
285    Function * finalSegmentFunction = m->getFunction(mKernelName + finalSegment_suffix);
286    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalSegmentFunction, 0));
287    BasicBlock * doStrides = BasicBlock::Create(iBuilder->getContext(), "doStrides", finalSegmentFunction, 0);
288    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", finalSegmentFunction, 0);
289    Type * const size_ty = iBuilder->getSizeTy();
290    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
291    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
292    Function::arg_iterator args = finalSegmentFunction->arg_begin();
293    Value * self = &*(args++);
294    Value * blocksToDo = &*(args);
295    std::vector<Value *> inbufProducerPtrs;
296    std::vector<Value *> endSignalPtrs;
297    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
298        Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
299        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
300        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
301    }
302   
303    std::vector<Value *> producerPos;
304    /* Determine the actually available data examining all input stream sets. */
305    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
306    producerPos.push_back(p);
307    Value * availablePos = producerPos[0];
308    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
309        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
310        producerPos.push_back(p);
311        /* Set the available position to be the minimum of availablePos and producerPos. */
312        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
313    }
314    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
315    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
316//#ifndef NDEBUG
317//    iBuilder->CallPrintInt(mKernelName + "_itemsAvail final", itemsAvail);
318//#endif
319    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
320    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
321    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
322    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
323    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
324    Value * notDone = iBuilder->CreateICmpUGT(stridesToDo, ConstantInt::get(size_ty, 0));
325    iBuilder->CreateCondBr(notDone, doStrides, stridesDone);
326   
327    iBuilder->SetInsertPoint(doStrides);
328    createDoSegmentCall(self, blocksToDo);
329    iBuilder->CreateBr(stridesDone);
330   
331    iBuilder->SetInsertPoint(stridesDone);
332    /* Now at most a partial block remains. */
333   
334    processed = getProcessedItemCount(self, mStreamSetInputs[0].name);   
335    Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
336    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
337       
338    createFinalBlockCall(self, remainingItems);
339   
340    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
341        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
342        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, remainingItems));
343    }
344    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
345        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
346            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
347            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, remainingItems));
348        }
349    }
350//#ifndef NDEBUG
351//    iBuilder->CallPrintInt(mKernelName + "_processed final", processed);
352//#endif
353    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
354        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
355        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
356        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
357        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
358    }
359
360    iBuilder->CreateRetVoid();
361
362    iBuilder->restoreIP(savePoint);
363}
364
365
366
367ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
368    const auto f = mKernelMap.find(name);
369    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
370        llvm::report_fatal_error("Kernel does not contain scalar: " + name);
371    }
372    return iBuilder->getInt32(f->second);
373}
374
375unsigned KernelBuilder::getScalarCount() const {
376    return mKernelFields.size();
377}
378
379Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
380    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
381}
382
383Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
384    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
385}
386
387void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * newFieldVal) const {
388    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
389}
390
391LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
392    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
393    return iBuilder->CreateAtomicLoadAcquire(ptr);
394}
395
396Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
397    return getScalarField(self, ssName + processedItemCountSuffix);
398}
399
400Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
401    return getScalarField(self, ssName + producedItemCountSuffix);
402}
403
404Value * KernelBuilder::getTerminationSignal(Value * self) const {
405    return getScalarField(self, terminationSignal);
406}
407
408void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
409    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
410    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
411}
412
413void KernelBuilder::setProcessedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
414    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + processedItemCountSuffix)});
415    iBuilder->CreateStore(newCount, ptr);
416}
417
418void KernelBuilder::setProducedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
419    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + producedItemCountSuffix)});
420    iBuilder->CreateStore(newCount, ptr);
421}
422
423void KernelBuilder::setTerminationSignal(Value * self) const {
424    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
425    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
426}
427
428Value * KernelBuilder::getBlockNo(Value * self) const {
429    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
430    return iBuilder->CreateLoad(ptr);
431}
432
433void KernelBuilder::setBlockNo(Value * self, Value * value) const {
434    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
435    iBuilder->CreateStore(value, ptr);
436}
437
438
439Value * KernelBuilder::getParameter(Function * f, const std::string & paramName) const {
440    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
441        Value * arg = &*argIter;
442        if (arg->getName() == paramName) return arg;
443    }
444    llvm::report_fatal_error("Method does not have parameter: " + paramName);
445}
446
447unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
448    const auto f = mStreamSetNameMap.find(name);
449    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
450        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
451    }
452    return f->second;
453}
454
455Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
456    return getScalarField(self, name + structPtrSuffix);
457}
458
459inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
460    const unsigned structIdx = getStreamSetIndex(name);
461    if (structIdx < mStreamSetInputs.size()) {
462        return mStreamSetInputBuffers[structIdx];
463    } else {
464        return mStreamSetOutputBuffers[structIdx - mStreamSetInputs.size()];
465    }
466}
467
468Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
469    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetStructPtr(self, name), blockNo);
470}
471
472Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
473    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index);
474}
475
476Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
477    assert (index1->getType() == index2->getType());
478    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index1, index2);
479}
480
481Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
482    return getStreamSetBuffer(name)->getStreamView(getStreamSetStructPtr(self, name), blockNo, index);
483}
484
485Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
486    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetStructPtr(self, name), blockNo, index);
487}
488
489void KernelBuilder::createInstance() {
490    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
491        llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
492    }
493    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
494    Module * m = iBuilder->getModule();
495    std::vector<Value *> init_args = {mKernelInstance};
496    for (auto a : mInitialArguments) {
497        init_args.push_back(a);
498    }
499    for (auto b : mStreamSetInputBuffers) {
500        init_args.push_back(b->getStreamSetStructPtr());
501    }
502    for (auto b : mStreamSetOutputBuffers) {
503        init_args.push_back(b->getStreamSetStructPtr());
504    }
505    std::string initFnName = mKernelName + init_suffix;
506    Function * initMethod = m->getFunction(initFnName);
507    if (!initMethod) {
508        llvm::report_fatal_error("Cannot find " + initFnName);
509    }
510    iBuilder->CreateCall(initMethod, init_args);
511}
512
513Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
514    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
515        llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
516    }
517    Module * m = iBuilder->getModule();
518    Type * const voidTy = iBuilder->getVoidTy();
519    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
520    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
521    Type * const int1ty = iBuilder->getInt1Ty();
522
523    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
524    threadFunc->setCallingConv(CallingConv::C);
525    Function::arg_iterator args = threadFunc->arg_begin();
526
527    Value * const arg = &*(args++);
528    arg->setName("args");
529
530    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
531
532    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
533
534    std::vector<Value *> inbufProducerPtrs;
535    std::vector<Value *> inbufConsumerPtrs;
536    std::vector<Value *> outbufProducerPtrs;
537    std::vector<Value *> outbufConsumerPtrs;   
538    std::vector<Value *> endSignalPtrs;
539
540    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
541        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
542        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
543        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
544        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
545    }
546    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
547        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
548        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
549        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
550    }
551
552    const unsigned segmentBlocks = codegen::SegmentSize;
553    const unsigned bufferSegments = codegen::BufferSegments;
554    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
555    Type * const size_ty = iBuilder->getSizeTy();
556
557    Value * segSize = ConstantInt::get(size_ty, segmentSize);
558    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
559    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
560   
561    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
562    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
563   
564    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
565    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
566    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
567    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
568    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
569
570    iBuilder->CreateBr(outputCheckBlock);
571
572    iBuilder->SetInsertPoint(outputCheckBlock);
573
574    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
575    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
576        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
577        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
578        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
579        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
580        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
581    }
582   
583    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
584
585    iBuilder->SetInsertPoint(inputCheckBlock); 
586
587    Value * requiredSize = segSize;
588    if (mLookAheadPositions > 0) {
589        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
590    }
591    waitCondTest = ConstantInt::get(int1ty, 1); 
592    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
593        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
594        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
595        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
596        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
597        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
598    }
599
600    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
601   
602    iBuilder->SetInsertPoint(endSignalCheckBlock);
603   
604    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
605    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
606        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
607        iBuilder->CreateAnd(endSignal, endSignal_next);
608    }
609       
610    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
611   
612    iBuilder->SetInsertPoint(doSegmentBlock);
613 
614    createDoSegmentCall(self, segBlocks);
615
616    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
617        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
618        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
619    }
620   
621    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
622        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
623        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
624    }
625   
626    Value * earlyEndSignal = getTerminationSignal(self);
627    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
628        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
629        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
630
631        iBuilder->SetInsertPoint(earlyEndBlock);
632        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
633            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
634            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
635        }       
636    }
637    iBuilder->CreateBr(outputCheckBlock);
638     
639    iBuilder->SetInsertPoint(endBlock);
640    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
641    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
642    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
643    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
644    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
645    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
646
647    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
648
649    iBuilder->SetInsertPoint(doFinalSegBlock);
650
651    createDoSegmentCall(self, blocks);
652
653    iBuilder->CreateBr(doFinalBlock);
654
655    iBuilder->SetInsertPoint(doFinalBlock);
656
657    createFinalBlockCall(self, finalBlockRemainingBytes);
658
659    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
660        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
661        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
662    }
663    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
664        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
665    }
666
667    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
668        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
669        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
670    }
671
672    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
673    iBuilder->CreateRetVoid();
674
675    return threadFunc;
676
677}
678
679KernelBuilder::~KernelBuilder() {
680}
Note: See TracBrowser for help on using the repository browser.