source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5230

Last change on this file since 5230 was 5230, checked in by nmedfort, 3 years ago

Multi-threading support for PabloAST / PabloCompiler?. Requires unique LLVM Context / Module for each thread.

File size: 27.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
11#include <llvm/Support/ErrorHandling.h>
12#include <toolchain.h>
13
14using namespace llvm;
15using namespace kernel;
16
17KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
18                                 std::string kernelName,
19                                 std::vector<Binding> stream_inputs,
20                                 std::vector<Binding> stream_outputs,
21                                 std::vector<Binding> scalar_parameters,
22                                 std::vector<Binding> scalar_outputs,
23                                 std::vector<Binding> internal_scalars)
24: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {
25
26}
27
28unsigned KernelBuilder::addScalar(Type * type, std::string name) {
29    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
30        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
31    }
32    const auto index = mKernelFields.size();
33    mKernelMap.emplace(name, index);
34    mKernelFields.push_back(type);
35    return index;
36}
37
38void KernelBuilder::prepareKernel() {
39    unsigned blockSize = iBuilder->getBitBlockWidth();
40    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
41        std::string tmp;
42        raw_string_ostream out(tmp);
43        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
44            << mStreamSetInputs.size() << " input stream sets.";
45        throw std::runtime_error(out.str());
46    }
47    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
48        std::string tmp;
49        raw_string_ostream out(tmp);
50        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
51            << mStreamSetOutputs.size() << " output stream sets.";
52        throw std::runtime_error(out.str());
53    }
54    int streamSetNo = 0;
55    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
56        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
57             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
58        }
59        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
60        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
61        streamSetNo++;
62    }
63    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
64        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
65        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
66        streamSetNo++;
67    }
68    for (auto binding : mScalarInputs) {
69        addScalar(binding.type, binding.name);
70    }
71    for (auto binding : mScalarOutputs) {
72        addScalar(binding.type, binding.name);
73    }
74    for (auto binding : mInternalScalars) {
75        addScalar(binding.type, binding.name);
76    }
77    addScalar(iBuilder->getSizeTy(), blockNoScalar);
78    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
79    addScalar(iBuilder->getSizeTy(), processedItemCount);
80    addScalar(iBuilder->getSizeTy(), producedItemCount);
81    addScalar(iBuilder->getInt1Ty(), terminationSignal);
82    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
83}
84
85std::unique_ptr<Module> KernelBuilder::createKernelModule(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer *> output_buffers) {
86    Module * saveModule = iBuilder->getModule();
87    auto savePoint = iBuilder->saveIP();
88    auto theModule = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
89    Module * m = theModule.get();
90    iBuilder->setModule(m);
91    generateKernel(input_buffers, output_buffers);
92    iBuilder->setModule(saveModule);
93    iBuilder->restoreIP(savePoint);
94    return theModule;
95}
96
97void KernelBuilder::generateKernel(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer*> output_buffers) {
98    auto savePoint = iBuilder->saveIP();
99    Module * const m = iBuilder->getModule();
100    mStreamSetInputBuffers = input_buffers;
101    mStreamSetOutputBuffers = output_buffers;
102    prepareKernel();  // possibly overriden by the KernelBuilder subtype
103    addKernelDeclarations(m);
104    generateDoBlockMethod();     // must be implemented by the KernelBuilder subtype
105    generateFinalBlockMethod();  // possibly overriden by the KernelBuilder subtype
106    generateDoSegmentMethod();
107
108    // Implement the accumulator get functions
109    for (auto binding : mScalarOutputs) {
110        auto fnName = mKernelName + accumulator_infix + binding.name;
111        Function * accumFn = m->getFunction(fnName);
112        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
113        Value * self = &*(accumFn->arg_begin());
114        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
115        Value * retVal = iBuilder->CreateLoad(ptr);
116        iBuilder->CreateRet(retVal);
117    }
118    // Implement the initializer function
119    Function * initFunction = m->getFunction(mKernelName + init_suffix);
120    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
121   
122    Function::arg_iterator args = initFunction->arg_begin();
123    Value * self = &*(args++);
124    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
125    for (auto binding : mScalarInputs) {
126        Value * parm = &*(args++);
127        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
128        iBuilder->CreateStore(parm, ptr);
129    }
130    iBuilder->CreateRetVoid();
131    iBuilder->restoreIP(savePoint);
132}
133
134//  The default finalBlock method simply dispatches to the doBlock routine.
135void KernelBuilder::generateFinalBlockMethod() {
136    auto savePoint = iBuilder->saveIP();
137    Module * m = iBuilder->getModule();
138    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
139    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
140    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
141    // Final Block arguments: self, remaining, then the standard DoBlock args.
142    Function::arg_iterator args = finalBlockFunction->arg_begin();
143    Value * self = &*(args++);
144    /* Skip "remaining" arg */ args++;
145    std::vector<Value *> doBlockArgs = {self};
146    while (args != finalBlockFunction->arg_end()){
147        doBlockArgs.push_back(&*args++);
148    }
149    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
150    iBuilder->CreateRetVoid();
151    iBuilder->restoreIP(savePoint);
152}
153
154// Note: this may be overridden to incorporate doBlock logic directly into
155// the doSegment function.
156void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
157    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
158    iBuilder->CreateCall(doBlockFunction, {self});
159}
160
161//  The default doSegment method dispatches to the doBlock routine for
162//  each block of the given number of blocksToDo, and then updates counts.
163void KernelBuilder::generateDoSegmentMethod() {
164    auto savePoint = iBuilder->saveIP();
165    Module * m = iBuilder->getModule();
166    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
167    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
168    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
169    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
170    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
171    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
172    BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0);
173    BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0);
174    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
175    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
176    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
177    Type * const size_ty = iBuilder->getSizeTy();
178    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
179    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
180   
181    Function::arg_iterator args = doSegmentFunction->arg_begin();
182    Value * self = &*(args++);
183    Value * blocksToDo = &*(args);
184   
185    std::vector<Value *> inbufProducerPtrs;
186    std::vector<Value *> endSignalPtrs;
187    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
188        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
189        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
190        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
191    }
192   
193    std::vector<Value *> producerPos;
194    /* Determine the actually available data examining all input stream sets. */
195    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
196    producerPos.push_back(p);
197    Value * availablePos = producerPos[0];
198    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
199        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
200        producerPos.push_back(p);
201        /* Set the available position to be the minimum of availablePos and producerPos. */
202        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
203    }
204    Value * processed = getProcessedItemCount(self);
205    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
206//#ifndef NDEBUG
207//    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
208//#endif
209    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
210    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
211    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
212    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
213    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
214    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
215    iBuilder->CreateBr(strideLoopCond);
216
217    iBuilder->SetInsertPoint(strideLoopCond);
218    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
219    stridesRemaining->addIncoming(stridesToDo, entryBlock);
220    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
221    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
222
223    iBuilder->SetInsertPoint(strideLoopBody);
224    Value * blockNo = getScalarField(self, blockNoScalar);   
225
226    generateDoBlockLogic(self, blockNo);
227    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
228    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
229    iBuilder->CreateBr(strideLoopCond);
230   
231    iBuilder->SetInsertPoint(stridesDone);
232    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
233    setProcessedItemCount(self, processed);
234    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone);
235   
236    iBuilder->SetInsertPoint(checkFinalStride);
237   
238    /* We had less than a full segment of data; we may have reached the end of input
239       on one of the stream sets.  */
240   
241    Value * alreadyDone = getTerminationSignal(self);
242    iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals);
243   
244    iBuilder->SetInsertPoint(checkEndSignals);
245    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
246    if (endSignalPtrs.size() > 1) {
247        /* If there is more than one input stream set, then we need to confirm that one of
248           them has both the endSignal set and the length = to availablePos. */
249        endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
250        for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
251            Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
252            endOfInput = iBuilder->CreateOr(endOfInput, e);
253        }
254    }
255    iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
256   
257    iBuilder->SetInsertPoint(callFinalBlock);
258   
259    Value * remainingItems = iBuilder->CreateSub(availablePos, processed);
260    createFinalBlockCall(self, remainingItems);
261    setProcessedItemCount(self, availablePos);
262   
263    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
264        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
265        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
266    }
267    setTerminationSignal(self);
268    iBuilder->CreateBr(segmentDone);
269   
270    iBuilder->SetInsertPoint(segmentDone);
271    Value * produced = getProducedItemCount(self);
272//#ifndef NDEBUG
273//    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
274//#endif
275    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
276        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
277        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
278        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
279    }
280    iBuilder->CreateBr(finalExit);
281    iBuilder->SetInsertPoint(finalExit);
282
283    iBuilder->CreateRetVoid();
284    iBuilder->restoreIP(savePoint);
285}
286
287ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
288    const auto f = mKernelMap.find(name);
289    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
290        throw std::runtime_error("Kernel does not contain internal state: " + name);
291    }
292    return iBuilder->getInt32(f->second);
293}
294
295unsigned KernelBuilder::getScalarCount() const {
296    return mKernelFields.size();
297}
298
299Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) {
300    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
301}
302
303Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
304    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
305}
306
307void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
308    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
309}
310
311Value * KernelBuilder::acquireLogicalSegmentNo(Value * self) { 
312    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
313    LoadInst * segNo = iBuilder->CreateAtomicLoadAcquire(ptr);
314    return segNo;
315}
316
317Value * KernelBuilder::getProcessedItemCount(Value * self) { 
318    return getScalarField(self, processedItemCount);
319}
320
321Value * KernelBuilder::getProducedItemCount(Value * self) {
322    return getScalarField(self, producedItemCount);
323}
324
325Value * KernelBuilder::getTerminationSignal(Value * self) {
326    return getScalarField(self, terminationSignal);
327}
328
329void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) {
330    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
331    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
332}
333
334void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
335    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
336    iBuilder->CreateStore(newCount, ptr);
337}
338
339void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
340    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
341    iBuilder->CreateStore(newCount, ptr);
342}
343
344void KernelBuilder::setTerminationSignal(Value * self) {
345    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
346    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
347}
348                                     
349
350
351Value * KernelBuilder::getBlockNo(Value * self) {
352    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
353    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
354    return blockNo;
355}
356
357void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
358    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
359    iBuilder->CreateStore(newFieldVal, ptr);
360}
361
362
363Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
364    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
365        Value * arg = &*argIter;
366        if (arg->getName() == paramName) return arg;
367    }
368    llvm::report_fatal_error("Method does not have parameter: " + paramName);
369}
370
371unsigned KernelBuilder::getStreamSetIndex(std::string name) {
372    const auto f = mStreamSetNameMap.find(name);
373    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
374        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
375    }
376    return f->second;
377}
378
379size_t KernelBuilder::getStreamSetBufferSize(Value * self, std::string name) {
380    const unsigned index = getStreamSetIndex(name);
381    StreamSetBuffer * buf = nullptr;
382    if (index < mStreamSetInputs.size()) {
383        buf = mStreamSetInputBuffers[index];
384    } else {
385        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
386    }
387    return buf->getBufferSize();
388}
389
390Value * KernelBuilder::getStreamSetStructPtr(Value * self, std::string name) {
391    return getScalarField(self, name + structPtrSuffix);
392}
393
394Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string name, Value * blockNo) {
395    Value * const structPtr = getStreamSetStructPtr(self, name);
396    const unsigned index = getStreamSetIndex(name);
397    StreamSetBuffer * buf = nullptr;
398    if (index < mStreamSetInputs.size()) {
399        buf = mStreamSetInputBuffers[index];
400    } else {
401        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
402    }   
403    return buf->getStreamSetBlockPointer(structPtr, blockNo);
404}
405
406void KernelBuilder::createInstance() {
407    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
408    Module * m = iBuilder->getModule();
409    std::vector<Value *> init_args = {mKernelInstance};
410    for (auto a : mInitialArguments) {
411        init_args.push_back(a);
412    }
413    for (auto b : mStreamSetInputBuffers) {
414        init_args.push_back(b->getStreamSetStructPtr());
415    }
416    for (auto b : mStreamSetOutputBuffers) {
417        init_args.push_back(b->getStreamSetStructPtr());
418    }
419    std::string initFnName = mKernelName + init_suffix;
420    Function * initMethod = m->getFunction(initFnName);
421    if (!initMethod) {
422        llvm::report_fatal_error("Cannot find " + initFnName);
423    }
424    iBuilder->CreateCall(initMethod, init_args);
425}
426
427Function * KernelBuilder::generateThreadFunction(std::string name){
428    Module * m = iBuilder->getModule();
429    Type * const voidTy = iBuilder->getVoidTy();
430    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
431    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
432    Type * const int1ty = iBuilder->getInt1Ty();
433
434    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
435    threadFunc->setCallingConv(CallingConv::C);
436    Function::arg_iterator args = threadFunc->arg_begin();
437
438    Value * const arg = &*(args++);
439    arg->setName("args");
440
441    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
442
443    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
444
445    std::vector<Value *> inbufProducerPtrs;
446    std::vector<Value *> inbufConsumerPtrs;
447    std::vector<Value *> outbufProducerPtrs;
448    std::vector<Value *> outbufConsumerPtrs;   
449    std::vector<Value *> endSignalPtrs;
450
451    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
452        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
453        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
454        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
455        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
456    }
457    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
458        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
459        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
460        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
461    }
462
463    const unsigned segmentBlocks = codegen::SegmentSize;
464    const unsigned bufferSegments = codegen::BufferSegments;
465    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
466    Type * const size_ty = iBuilder->getSizeTy();
467
468    Value * segSize = ConstantInt::get(size_ty, segmentSize);
469    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
470    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
471   
472    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
473    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
474   
475    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
476    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
477    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
478    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
479    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
480
481    iBuilder->CreateBr(outputCheckBlock);
482
483    iBuilder->SetInsertPoint(outputCheckBlock);
484
485    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
486    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
487        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
488        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
489        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
490        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
491        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
492    }
493   
494    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
495
496    iBuilder->SetInsertPoint(inputCheckBlock); 
497
498    Value * requiredSize = segSize;
499    if (mLookAheadPositions > 0) {
500        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
501    }
502    waitCondTest = ConstantInt::get(int1ty, 1); 
503    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
504        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
505        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
506        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
507        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
508        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
509    }
510
511    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
512   
513    iBuilder->SetInsertPoint(endSignalCheckBlock);
514   
515    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
516    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
517        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
518        iBuilder->CreateAnd(endSignal, endSignal_next);
519    }
520       
521    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
522   
523    iBuilder->SetInsertPoint(doSegmentBlock);
524 
525    createDoSegmentCall(self, segBlocks);
526
527    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
528        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
529        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
530    }
531   
532    Value * produced = getProducedItemCount(self);
533    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
534        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
535    }
536   
537    Value * earlyEndSignal = getTerminationSignal(self);
538    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
539        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
540        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
541
542        iBuilder->SetInsertPoint(earlyEndBlock);
543        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
544            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
545            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
546        }       
547    }
548    iBuilder->CreateBr(outputCheckBlock);
549     
550    iBuilder->SetInsertPoint(endBlock);
551    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
552    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
553    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
554    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
555    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
556    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
557
558    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
559
560    iBuilder->SetInsertPoint(doFinalSegBlock);
561
562    createDoSegmentCall(self, blocks);
563
564    iBuilder->CreateBr(doFinalBlock);
565
566    iBuilder->SetInsertPoint(doFinalBlock);
567
568    createFinalBlockCall(self, finalBlockRemainingBytes);
569
570    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
571        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
572        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
573    }
574    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
575        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
576    }
577
578    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
579        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
580        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
581    }
582
583    Value * nullVal = Constant::getNullValue(voidPtrTy);
584    Function * pthreadExitFunc = m->getFunction("pthread_exit");
585    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal}); 
586    exitThread->setDoesNotReturn();
587    iBuilder->CreateRetVoid();
588
589    return threadFunc;
590
591}
Note: See TracBrowser for help on using the repository browser.