source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5266

Last change on this file since 5266 was 5266, checked in by cameron, 3 years ago

Fixes

File size: 28.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Value.h>               // for Value
8#include <llvm/Support/ErrorHandling.h>  // for report_fatal_error
9#include <toolchain.h>                   // for BufferSegments, SegmentSize
10#include "IR_Gen/idisa_builder.h"        // for IDISA_Builder
11#include "kernels/streamset.h"           // for StreamSetBuffer
12#include "llvm/ADT/StringRef.h"          // for StringRef, operator==
13#include "llvm/IR/CallingConv.h"         // for ::C
14#include "llvm/IR/Constant.h"            // for Constant
15#include "llvm/IR/Constants.h"           // for ConstantInt
16#include "llvm/IR/Function.h"            // for Function, Function::arg_iter...
17#include "llvm/IR/Instructions.h"        // for LoadInst (ptr only), PHINode
18#include "llvm/Support/Compiler.h"       // for LLVM_UNLIKELY
19namespace llvm { class BasicBlock; }
20namespace llvm { class Module; }
21namespace llvm { class Type; }
22
23using namespace llvm;
24using namespace kernel;
25using namespace parabix;
26
27KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
28                             std::string kernelName,
29                             std::vector<Binding> stream_inputs,
30                             std::vector<Binding> stream_outputs,
31                             std::vector<Binding> scalar_parameters,
32                             std::vector<Binding> scalar_outputs,
33                             std::vector<Binding> internal_scalars)
34: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars),
35mNoTerminateAttribute(false),
36mDoBlockUpdatesProducedItemCountsAttribute(false) {
37
38}
39
40unsigned KernelBuilder::addScalar(Type * type, const std::string & name) {
41    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
42        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50void KernelBuilder::prepareKernel() {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        llvm::report_fatal_error("Cannot prepare kernel after kernel state finalized");
53    }
54    unsigned blockSize = iBuilder->getBitBlockWidth();
55    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
56        std::string tmp;
57        raw_string_ostream out(tmp);
58        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
59            << mStreamSetInputs.size() << " input stream sets.";
60        throw std::runtime_error(out.str());
61    }
62    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
63        std::string tmp;
64        raw_string_ostream out(tmp);
65        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
66            << mStreamSetOutputs.size() << " output stream sets.";
67        throw std::runtime_error(out.str());
68    }
69    int streamSetNo = 0;
70    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
71        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
72             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
73        }
74        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
75        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
76        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
77        streamSetNo++;
78    }
79    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
80        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
81        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
82        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
83        streamSetNo++;
84    }
85    for (auto binding : mScalarInputs) {
86        addScalar(binding.type, binding.name);
87    }
88    for (auto binding : mScalarOutputs) {
89        addScalar(binding.type, binding.name);
90    }
91    for (auto binding : mInternalScalars) {
92        addScalar(binding.type, binding.name);
93    }
94    addScalar(iBuilder->getSizeTy(), blockNoScalar);
95    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
96    addScalar(iBuilder->getInt1Ty(), terminationSignal);
97    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
98}
99
100std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
101    auto saveModule = iBuilder->getModule();
102    auto savePoint = iBuilder->saveIP();
103    auto module = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
104    iBuilder->setModule(module.get());
105    generateKernel(inputs, outputs);
106    iBuilder->setModule(saveModule);
107    iBuilder->restoreIP(savePoint);
108    return module;
109}
110
111void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
112    auto savePoint = iBuilder->saveIP();
113    Module * const m = iBuilder->getModule();
114    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
115    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
116    prepareKernel();            // possibly overridden by the KernelBuilder subtype
117    addKernelDeclarations(m);
118    generateInitMethod();       // possibly overridden by the KernelBuilder subtype
119    generateDoSegmentMethod();
120
121    // Implement the accumulator get functions
122    for (auto binding : mScalarOutputs) {
123        auto fnName = mKernelName + accumulator_infix + binding.name;
124        Function * accumFn = m->getFunction(fnName);
125        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
126        Value * self = &*(accumFn->arg_begin());
127        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
128        Value * retVal = iBuilder->CreateLoad(ptr);
129        iBuilder->CreateRet(retVal);
130    }
131    iBuilder->restoreIP(savePoint);
132}
133
134// Default init method, possibly overridden if special init actions required.
135void KernelBuilder::generateInitMethod() const {
136    auto savePoint = iBuilder->saveIP();
137    Module * const m = iBuilder->getModule();
138    Function * initFunction = m->getFunction(mKernelName + init_suffix);
139    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));   
140    Function::arg_iterator args = initFunction->arg_begin();
141    Value * self = &*(args++);
142    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), self);
143    for (auto binding : mScalarInputs) {
144        Value * param = &*(args++);
145        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
146        iBuilder->CreateStore(param, ptr);
147    }
148    iBuilder->CreateRetVoid();
149    iBuilder->restoreIP(savePoint);
150}
151
152//  The default finalBlock method simply dispatches to the doBlock routine.
153void KernelBuilder::generateFinalBlockMethod() const {
154    auto savePoint = iBuilder->saveIP();
155    Module * m = iBuilder->getModule();
156    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
157    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
158    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
159    // Final Block arguments: self, remaining, then the standard DoBlock args.
160    Function::arg_iterator args = finalBlockFunction->arg_begin();
161    Value * self = &*(args++);
162    /* Skip "remaining" arg */ args++;
163    std::vector<Value *> doBlockArgs = {self};
164    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
165    iBuilder->CreateRetVoid();
166    iBuilder->restoreIP(savePoint);
167}
168
169// Note: this may be overridden to incorporate doBlock logic directly into
170// the doSegment function.
171void KernelBuilder::generateDoBlockLogic(Value * self, Value * /* blockNo */) const {
172    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
173    iBuilder->CreateCall(doBlockFunction, self);
174}
175
176// Note: this may be overridden to incorporate doBlock logic directly into
177// the doSegment function.
178void KernelBuilder::generateDoBlockMethod() const {
179    llvm::report_fatal_error(mKernelName + " DoBlock method called but not implemented");
180}
181
182
183//  The default doSegment method dispatches to the doBlock routine for
184//  each block of the given number of blocksToDo, and then updates counts.
185void KernelBuilder::generateDoSegmentMethod() const {
186    generateDoBlockMethod();    // must be implemented by the KernelBuilder subtype
187    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
188
189 
190    auto savePoint = iBuilder->saveIP();
191    Module * m = iBuilder->getModule();
192    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
193    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), mKernelName + "_entry", doSegmentFunction, 0));
194    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
195    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopCond", doSegmentFunction, 0);
196    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopBody", doSegmentFunction, 0);
197    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_stridesDone", doSegmentFunction, 0);
198    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_doFinalBlock", doSegmentFunction, 0);
199    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
200    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_finalExit", doSegmentFunction, 0);
201    Type * const size_ty = iBuilder->getSizeTy();
202    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
203    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
204   
205    Function::arg_iterator args = doSegmentFunction->arg_begin();
206    Value * self = &*(args++);
207    Value * doFinal = &*(args++);
208   
209    std::vector<Value *> producerPos;
210    producerPos.push_back(&*(args++));
211    Value * availablePos = producerPos[0];
212    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
213        Value * p = &*(args++);
214        producerPos.push_back(p);
215        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
216    }
217    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
218    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
219    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
220    iBuilder->CreateBr(strideLoopCond);
221
222    iBuilder->SetInsertPoint(strideLoopCond);
223    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
224    stridesRemaining->addIncoming(stridesToDo, entryBlock);
225    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
226    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
227
228    iBuilder->SetInsertPoint(strideLoopBody);
229    Value * blockNo = getScalarField(self, blockNoScalar);   
230
231    generateDoBlockLogic(self, blockNo);
232    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
233    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
234    iBuilder->CreateBr(strideLoopCond);
235   
236    iBuilder->SetInsertPoint(stridesDone);
237    // Update counts for the full strides processed.
238    Value * segmentItemsProcessed = iBuilder->CreateMul(stridesToDo, stride);
239    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
240        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
241        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, segmentItemsProcessed));
242    }
243    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
244        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
245            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
246           
247            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
248            //iBuilder->CallPrintInt(mKernelName + " produced ", iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
249        }
250    }
251   
252    // Now conditionally perform the final block processing depending on the doFinal parameter.
253    iBuilder->CreateCondBr(doFinal, doFinalBlock, segmentDone);
254    iBuilder->SetInsertPoint(doFinalBlock);
255
256    Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(self, mStreamSetInputs[0].name));
257    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
258   
259    createFinalBlockCall(self, remainingItems);
260    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
261        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
262        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, remainingItems));
263    }
264    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
265        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
266            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
267            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, remainingItems));
268        }
269    }
270    setTerminationSignal(self);
271    iBuilder->CreateBr(segmentDone);
272   
273    iBuilder->SetInsertPoint(segmentDone);
274//#ifndef NDEBUG
275//    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
276//#endif
277    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
278        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
279        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
280        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
281        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
282    }
283    iBuilder->CreateBr(finalExit);
284    iBuilder->SetInsertPoint(finalExit);
285
286    iBuilder->CreateRetVoid();
287    iBuilder->restoreIP(savePoint);
288}
289
290
291ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
292    const auto f = mKernelMap.find(name);
293    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
294        llvm::report_fatal_error("Kernel does not contain scalar: " + name);
295    }
296    return iBuilder->getInt32(f->second);
297}
298
299unsigned KernelBuilder::getScalarCount() const {
300    return mKernelFields.size();
301}
302
303Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
304    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
305}
306
307Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
308    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
309}
310
311void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * newFieldVal) const {
312    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
313}
314
315LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
316    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
317    return iBuilder->CreateAtomicLoadAcquire(ptr);
318}
319
320Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
321    return getScalarField(self, ssName + processedItemCountSuffix);
322}
323
324Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
325    return getScalarField(self, ssName + producedItemCountSuffix);
326}
327
328Value * KernelBuilder::getTerminationSignal(Value * self) const {
329    return getScalarField(self, terminationSignal);
330}
331
332void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
333    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
334    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
335}
336
337void KernelBuilder::setProcessedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
338    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + processedItemCountSuffix)});
339    iBuilder->CreateStore(newCount, ptr);
340}
341
342void KernelBuilder::setProducedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
343    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + producedItemCountSuffix)});
344    iBuilder->CreateStore(newCount, ptr);
345}
346
347void KernelBuilder::setTerminationSignal(Value * self) const {
348    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
349    //iBuilder->CallPrintInt(mKernelName + " setTermination", getScalarIndex(terminationSignal));
350    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
351}
352
353Value * KernelBuilder::getBlockNo(Value * self) const {
354    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
355    return iBuilder->CreateLoad(ptr);
356}
357
358void KernelBuilder::setBlockNo(Value * self, Value * value) const {
359    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
360    iBuilder->CreateStore(value, ptr);
361}
362
363
364Value * KernelBuilder::getParameter(Function * f, const std::string & paramName) const {
365    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
366        Value * arg = &*argIter;
367        if (arg->getName() == paramName) return arg;
368    }
369    llvm::report_fatal_error("Method does not have parameter: " + paramName);
370}
371
372unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
373    const auto f = mStreamSetNameMap.find(name);
374    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
375        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
376    }
377    return f->second;
378}
379
380Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
381    return getScalarField(self, name + structPtrSuffix);
382}
383
384inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
385    const unsigned structIdx = getStreamSetIndex(name);
386    if (structIdx < mStreamSetInputs.size()) {
387        return mStreamSetInputBuffers[structIdx];
388    } else {
389        return mStreamSetOutputBuffers[structIdx - mStreamSetInputs.size()];
390    }
391}
392
393Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
394    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetStructPtr(self, name), blockNo);
395}
396
397Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
398    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index);
399}
400
401Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
402    assert (index1->getType() == index2->getType());
403    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index1, index2);
404}
405
406Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
407    return getStreamSetBuffer(name)->getStreamView(getStreamSetStructPtr(self, name), blockNo, index);
408}
409
410Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
411    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetStructPtr(self, name), blockNo, index);
412}
413
414void KernelBuilder::createInstance() {
415    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
416        llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
417    }
418    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
419    Module * m = iBuilder->getModule();
420    std::vector<Value *> init_args = {mKernelInstance};
421    for (auto a : mInitialArguments) {
422        init_args.push_back(a);
423    }
424    for (auto b : mStreamSetInputBuffers) {
425        init_args.push_back(b->getStreamSetStructPtr());
426    }
427    for (auto b : mStreamSetOutputBuffers) {
428        init_args.push_back(b->getStreamSetStructPtr());
429    }
430    std::string initFnName = mKernelName + init_suffix;
431    Function * initMethod = m->getFunction(initFnName);
432    if (!initMethod) {
433        llvm::report_fatal_error("Cannot find " + initFnName);
434    }
435    iBuilder->CreateCall(initMethod, init_args);
436}
437
438Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
439    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
440        llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
441    }
442    Module * m = iBuilder->getModule();
443    Type * const voidTy = iBuilder->getVoidTy();
444    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
445    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
446    Type * const int1ty = iBuilder->getInt1Ty();
447   
448    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
449    threadFunc->setCallingConv(CallingConv::C);
450    Function::arg_iterator args = threadFunc->arg_begin();
451   
452    Value * const arg = &*(args++);
453    arg->setName("args");
454   
455    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
456   
457    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
458   
459    std::vector<Value *> inbufProducerPtrs;
460    std::vector<Value *> inbufConsumerPtrs;
461    std::vector<Value *> outbufProducerPtrs;
462    std::vector<Value *> outbufConsumerPtrs;   
463    std::vector<Value *> endSignalPtrs;
464   
465    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
466        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
467        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
468        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
469        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
470    }
471    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
472        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
473        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
474        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
475    }
476   
477    const unsigned segmentBlocks = codegen::SegmentSize;
478    const unsigned bufferSegments = codegen::BufferSegments;
479    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
480    Type * const size_ty = iBuilder->getSizeTy();
481   
482    Value * segSize = ConstantInt::get(size_ty, segmentSize);
483    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
484   
485    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
486    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
487   
488    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
489    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
490    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
491   
492    iBuilder->CreateBr(outputCheckBlock);
493   
494    iBuilder->SetInsertPoint(outputCheckBlock);
495   
496    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
497    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
498        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
499        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
500        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
501        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
502        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
503    }
504   
505    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
506   
507    iBuilder->SetInsertPoint(inputCheckBlock); 
508   
509    Value * requiredSize = segSize;
510    if (mLookAheadPositions > 0) {
511        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
512    }
513    waitCondTest = ConstantInt::get(int1ty, 1); 
514    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
515        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
516        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
517        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
518        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
519        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
520    }
521   
522    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
523   
524    iBuilder->SetInsertPoint(endSignalCheckBlock);
525   
526    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
527    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
528        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
529        iBuilder->CreateAnd(endSignal, endSignal_next);
530    }
531   
532    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
533   
534    iBuilder->SetInsertPoint(doSegmentBlock);
535   
536    // needs positions
537    createDoSegmentCall({self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
538   
539    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
540        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
541        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
542    }
543   
544    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
545        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
546        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
547    }
548   
549    Value * earlyEndSignal = getTerminationSignal(self);
550    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
551        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
552        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
553       
554        iBuilder->SetInsertPoint(earlyEndBlock);
555        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
556            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
557            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
558        }       
559    }
560    iBuilder->CreateBr(outputCheckBlock);
561   
562    iBuilder->SetInsertPoint(endBlock);
563    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
564    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
565    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
566   
567        // needs positions
568    createDoSegmentCall({self, ConstantInt::getAllOnesValue(iBuilder->getInt1Ty())});
569   
570   
571    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
572        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
573        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
574    }
575    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
576        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
577    }
578   
579    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
580        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
581        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
582    }
583   
584    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
585    iBuilder->CreateRetVoid();
586   
587    return threadFunc;
588   
589}
590
591KernelBuilder::~KernelBuilder() {
592}
Note: See TracBrowser for help on using the repository browser.