source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5224

Last change on this file since 5224 was 5224, checked in by cameron, 3 years ago

Move responsibility for acquire/release of logical segment number into pipeline compilers.

File size: 27.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
11#include <llvm/IR/TypeBuilder.h>
12#include <llvm/Support/ErrorHandling.h>
13#include <toolchain.h>
14
15using namespace llvm;
16using namespace kernel;
17
18KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
19                                 std::string kernelName,
20                                 std::vector<Binding> stream_inputs,
21                                 std::vector<Binding> stream_outputs,
22                                 std::vector<Binding> scalar_parameters,
23                                 std::vector<Binding> scalar_outputs,
24                                 std::vector<Binding> internal_scalars) :
25    KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {}
26
27void KernelBuilder::addScalar(Type * t, std::string name) {
28    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
29        llvm::report_fatal_error("Illegal addition of kernel field after kernel state finalized: " + name);
30    }
31    unsigned index = mKernelFields.size();
32    mKernelFields.push_back(t);
33    mInternalStateNameMap.emplace(name, index);
34}
35
36void KernelBuilder::prepareKernel() {
37    unsigned blockSize = iBuilder->getBitBlockWidth();
38    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
39        std::string tmp;
40        raw_string_ostream out(tmp);
41        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
42            << mStreamSetInputs.size() << " input stream sets.";
43        throw std::runtime_error(out.str());
44    }
45    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
46        std::string tmp;
47        raw_string_ostream out(tmp);
48        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
49            << mStreamSetOutputs.size() << " output stream sets.";
50        throw std::runtime_error(out.str());
51    }
52    addScalar(iBuilder->getSizeTy(), blockNoScalar);
53    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
54    addScalar(iBuilder->getSizeTy(), processedItemCount);
55    addScalar(iBuilder->getSizeTy(), producedItemCount);
56    addScalar(iBuilder->getInt1Ty(), terminationSignal);
57    int streamSetNo = 0;
58    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
59        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
60             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
61        }
62        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
63        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
64        streamSetNo++;
65    }
66    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
67        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
68        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
69        streamSetNo++;
70    }
71    for (auto binding : mScalarInputs) {
72        addScalar(binding.type, binding.name);
73    }
74    for (auto binding : mScalarOutputs) {
75        addScalar(binding.type, binding.name);
76    }
77    for (auto binding : mInternalScalars) {
78        addScalar(binding.type, binding.name);
79    }
80    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
81}
82
83std::unique_ptr<Module> KernelBuilder::createKernelModule(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer *> output_buffers) {
84    Module * saveModule = iBuilder->getModule();
85    auto savePoint = iBuilder->saveIP();
86    auto theModule = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
87    Module * m = theModule.get();
88    iBuilder->setModule(m);
89    generateKernel(input_buffers, output_buffers);
90    iBuilder->setModule(saveModule);
91    iBuilder->restoreIP(savePoint);
92    return theModule;
93}
94
95void KernelBuilder::generateKernel(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer*> output_buffers) {
96    auto savePoint = iBuilder->saveIP();
97    Module * m = iBuilder->getModule();
98    mStreamSetInputBuffers = input_buffers;
99    mStreamSetOutputBuffers = output_buffers;
100    prepareKernel();  // possibly overriden by the KernelBuilder subtype
101    KernelInterface::addKernelDeclarations(m);
102    generateDoBlockMethod();     // must be implemented by the KernelBuilder subtype
103    generateFinalBlockMethod();  // possibly overriden by the KernelBuilder subtype
104    generateDoSegmentMethod();
105
106    // Implement the accumulator get functions
107    for (auto binding : mScalarOutputs) {
108        auto fnName = mKernelName + accumulator_infix + binding.name;
109        Function * accumFn = m->getFunction(fnName);
110        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
111        Value * self = &*(accumFn->arg_begin());
112        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
113        Value * retVal = iBuilder->CreateLoad(ptr);
114        iBuilder->CreateRet(retVal);
115    }
116    // Implement the initializer function
117    Function * initFunction = m->getFunction(mKernelName + init_suffix);
118    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
119   
120    Function::arg_iterator args = initFunction->arg_begin();
121    Value * self = &*(args++);
122    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
123    for (auto binding : mScalarInputs) {
124        Value * parm = &*(args++);
125        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
126        iBuilder->CreateStore(parm, ptr);
127    }
128    iBuilder->CreateRetVoid();
129    iBuilder->restoreIP(savePoint);
130}
131
132//  The default finalBlock method simply dispatches to the doBlock routine.
133void KernelBuilder::generateFinalBlockMethod() {
134    auto savePoint = iBuilder->saveIP();
135    Module * m = iBuilder->getModule();
136    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
137    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
138    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
139    // Final Block arguments: self, remaining, then the standard DoBlock args.
140    Function::arg_iterator args = finalBlockFunction->arg_begin();
141    Value * self = &*(args++);
142    /* Skip "remaining" arg */ args++;
143    std::vector<Value *> doBlockArgs = {self};
144    while (args != finalBlockFunction->arg_end()){
145        doBlockArgs.push_back(&*args++);
146    }
147    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
148    iBuilder->CreateRetVoid();
149    iBuilder->restoreIP(savePoint);
150}
151
152// Note: this may be overridden to incorporate doBlock logic directly into
153// the doSegment function.
154void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
155    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
156    iBuilder->CreateCall(doBlockFunction, {self});
157}
158
159//  The default doSegment method dispatches to the doBlock routine for
160//  each block of the given number of blocksToDo, and then updates counts.
161void KernelBuilder::generateDoSegmentMethod() {
162    auto savePoint = iBuilder->saveIP();
163    Module * m = iBuilder->getModule();
164    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
165    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
166    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
167    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
168    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
169    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
170    BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0);
171    BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0);
172    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
173    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
174    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
175    Type * const size_ty = iBuilder->getSizeTy();
176    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
177    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
178   
179    Function::arg_iterator args = doSegmentFunction->arg_begin();
180    Value * self = &*(args++);
181    Value * blocksToDo = &*(args);
182   
183    std::vector<Value *> inbufProducerPtrs;
184    std::vector<Value *> endSignalPtrs;
185    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
186        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
187        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
188        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
189    }
190   
191    std::vector<Value *> producerPos;
192    /* Determine the actually available data examining all input stream sets. */
193    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
194    producerPos.push_back(p);
195    Value * availablePos = producerPos[0];
196    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
197        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
198        producerPos.push_back(p);
199        /* Set the available position to be the minimum of availablePos and producerPos. */
200        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
201    }
202    Value * processed = getProcessedItemCount(self);
203    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
204//#ifndef NDEBUG
205//    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
206//#endif
207    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
208    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
209    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
210    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
211    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
212    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
213    iBuilder->CreateBr(strideLoopCond);
214
215    iBuilder->SetInsertPoint(strideLoopCond);
216    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
217    stridesRemaining->addIncoming(stridesToDo, entryBlock);
218    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
219    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
220
221    iBuilder->SetInsertPoint(strideLoopBody);
222    Value * blockNo = getScalarField(self, blockNoScalar);   
223
224    generateDoBlockLogic(self, blockNo);
225    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
226    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
227    iBuilder->CreateBr(strideLoopCond);
228   
229    iBuilder->SetInsertPoint(stridesDone);
230    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
231    setProcessedItemCount(self, processed);
232    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone);
233   
234    iBuilder->SetInsertPoint(checkFinalStride);
235   
236    /* We had less than a full segment of data; we may have reached the end of input
237       on one of the stream sets.  */
238   
239    Value * alreadyDone = getTerminationSignal(self);
240    iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals);
241   
242    iBuilder->SetInsertPoint(checkEndSignals);
243    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
244    if (endSignalPtrs.size() > 1) {
245        /* If there is more than one input stream set, then we need to confirm that one of
246           them has both the endSignal set and the length = to availablePos. */
247        endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
248        for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
249            Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
250            endOfInput = iBuilder->CreateOr(endOfInput, e);
251        }
252    }
253    iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
254   
255    iBuilder->SetInsertPoint(callFinalBlock);
256   
257    Value * remainingItems = iBuilder->CreateSub(availablePos, processed);
258    createFinalBlockCall(self, remainingItems);
259    setProcessedItemCount(self, availablePos);
260   
261    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
262        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
263        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
264    }
265    setTerminationSignal(self);
266    iBuilder->CreateBr(segmentDone);
267   
268    iBuilder->SetInsertPoint(segmentDone);
269    Value * produced = getProducedItemCount(self);
270//#ifndef NDEBUG
271//    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
272//#endif
273    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
274        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
275        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
276        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
277    }
278    iBuilder->CreateBr(finalExit);
279    iBuilder->SetInsertPoint(finalExit);
280
281    iBuilder->CreateRetVoid();
282    iBuilder->restoreIP(savePoint);
283}
284
285Value * KernelBuilder::getScalarIndex(std::string fieldName) {
286    const auto f = mInternalStateNameMap.find(fieldName);
287    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
288        throw std::runtime_error("Kernel does not contain internal state: " + fieldName);
289    }
290    return iBuilder->getInt32(f->second);
291}
292
293Value * KernelBuilder::getScalarFieldPtr(Value * self, std::string fieldName) {
294    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
295}
296
297Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
298    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
299}
300
301void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
302    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
303}
304
305Value * KernelBuilder::acquireLogicalSegmentNo(Value * self) { 
306    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
307    LoadInst * segNo = iBuilder->CreateAtomicLoadAcquire(ptr);
308    return segNo;
309}
310
311Value * KernelBuilder::getProcessedItemCount(Value * self) { 
312    return getScalarField(self, processedItemCount);
313}
314
315Value * KernelBuilder::getProducedItemCount(Value * self) {
316    return getScalarField(self, producedItemCount);
317}
318
319Value * KernelBuilder::getTerminationSignal(Value * self) {
320    return getScalarField(self, terminationSignal);
321}
322
323void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) {
324    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
325    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
326}
327
328void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
329    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
330    iBuilder->CreateStore(newCount, ptr);
331}
332
333void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
334    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
335    iBuilder->CreateStore(newCount, ptr);
336}
337
338void KernelBuilder::setTerminationSignal(Value * self) {
339    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
340    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
341}
342                                     
343
344
345Value * KernelBuilder::getBlockNo(Value * self) {
346    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
347    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
348    return blockNo;
349}
350
351void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
352    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
353    iBuilder->CreateStore(newFieldVal, ptr);
354}
355
356
357Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
358    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
359        Value * arg = &*argIter;
360        if (arg->getName() == paramName) return arg;
361    }
362    llvm::report_fatal_error("Method does not have parameter: " + paramName);
363}
364
365unsigned KernelBuilder::getStreamSetIndex(std::string name) {
366    const auto f = mStreamSetNameMap.find(name);
367    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
368        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
369    }
370    return f->second;
371}
372
373size_t KernelBuilder::getStreamSetBufferSize(Value * self, std::string name) {
374    const unsigned index = getStreamSetIndex(name);
375    StreamSetBuffer * buf = nullptr;
376    if (index < mStreamSetInputs.size()) {
377        buf = mStreamSetInputBuffers[index];
378    } else {
379        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
380    }
381    return buf->getBufferSize();
382}
383
384Value * KernelBuilder::getStreamSetStructPtr(Value * self, std::string name) {
385    return getScalarField(self, name + structPtrSuffix);
386}
387
388Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string name, Value * blockNo) {
389    Value * const structPtr = getStreamSetStructPtr(self, name);
390    const unsigned index = getStreamSetIndex(name);
391    StreamSetBuffer * buf = nullptr;
392    if (index < mStreamSetInputs.size()) {
393        buf = mStreamSetInputBuffers[index];
394    } else {
395        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
396    }   
397    return buf->getStreamSetBlockPointer(structPtr, blockNo);
398}
399
400void KernelBuilder::createInstance() {
401    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
402    Module * m = iBuilder->getModule();
403    std::vector<Value *> init_args = {mKernelInstance};
404    for (auto a : mInitialArguments) {
405        init_args.push_back(a);
406    }
407    for (auto b : mStreamSetInputBuffers) {
408        init_args.push_back(b->getStreamSetStructPtr());
409    }
410    for (auto b : mStreamSetOutputBuffers) {
411        init_args.push_back(b->getStreamSetStructPtr());
412    }
413    std::string initFnName = mKernelName + init_suffix;
414    Function * initMethod = m->getFunction(initFnName);
415    if (!initMethod) {
416        llvm::report_fatal_error("Cannot find " + initFnName);
417    }
418    iBuilder->CreateCall(initMethod, init_args);
419}
420
421Function * KernelBuilder::generateThreadFunction(std::string name){
422    Module * m = iBuilder->getModule();
423    Type * const voidTy = Type::getVoidTy(m->getContext());
424    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
425    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
426    Type * const int1ty = iBuilder->getInt1Ty();
427
428    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
429    threadFunc->setCallingConv(CallingConv::C);
430    Function::arg_iterator args = threadFunc->arg_begin();
431
432    Value * const arg = &*(args++);
433    arg->setName("args");
434
435    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
436
437    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
438
439    std::vector<Value *> inbufProducerPtrs;
440    std::vector<Value *> inbufConsumerPtrs;
441    std::vector<Value *> outbufProducerPtrs;
442    std::vector<Value *> outbufConsumerPtrs;   
443    std::vector<Value *> endSignalPtrs;
444
445    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
446        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
447        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
448        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
449        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
450    }
451    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
452        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
453        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
454        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
455    }
456
457    const unsigned segmentBlocks = codegen::SegmentSize;
458    const unsigned bufferSegments = codegen::BufferSegments;
459    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
460    Type * const size_ty = iBuilder->getSizeTy();
461
462    Value * segSize = ConstantInt::get(size_ty, segmentSize);
463    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
464    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
465   
466    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
467    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
468   
469    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
470    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
471    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
472    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
473    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
474
475    iBuilder->CreateBr(outputCheckBlock);
476
477    iBuilder->SetInsertPoint(outputCheckBlock);
478
479    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
480    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
481        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
482        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
483        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
484        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
485        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
486    }
487   
488    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
489
490    iBuilder->SetInsertPoint(inputCheckBlock); 
491
492    Value * requiredSize = segSize;
493    if (mLookAheadPositions > 0) {
494        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
495    }
496    waitCondTest = ConstantInt::get(int1ty, 1); 
497    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
498        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
499        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
500        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
501        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
502        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
503    }
504
505    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
506   
507    iBuilder->SetInsertPoint(endSignalCheckBlock);
508   
509    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
510    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
511        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
512        iBuilder->CreateAnd(endSignal, endSignal_next);
513    }
514       
515    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
516   
517    iBuilder->SetInsertPoint(doSegmentBlock);
518 
519    createDoSegmentCall(self, segBlocks);
520
521    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
522        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
523        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
524    }
525   
526    Value * produced = getProducedItemCount(self);
527    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
528        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
529    }
530   
531    Value * earlyEndSignal = getTerminationSignal(self);
532    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
533        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
534        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
535
536        iBuilder->SetInsertPoint(earlyEndBlock);
537        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
538            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
539            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
540        }       
541    }
542    iBuilder->CreateBr(outputCheckBlock);
543     
544    iBuilder->SetInsertPoint(endBlock);
545    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
546    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
547    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
548    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
549    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
550    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
551
552    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
553
554    iBuilder->SetInsertPoint(doFinalSegBlock);
555
556    createDoSegmentCall(self, blocks);
557
558    iBuilder->CreateBr(doFinalBlock);
559
560    iBuilder->SetInsertPoint(doFinalBlock);
561
562    createFinalBlockCall(self, finalBlockRemainingBytes);
563
564    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
565        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
566        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
567    }
568    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
569        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
570    }
571
572    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
573        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
574        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
575    }
576
577    Value * nullVal = Constant::getNullValue(voidPtrTy);
578    Function * pthreadExitFunc = m->getFunction("pthread_exit");
579    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal}); 
580    exitThread->setDoesNotReturn();
581    iBuilder->CreateRetVoid();
582
583    return threadFunc;
584
585}
Note: See TracBrowser for help on using the repository browser.