source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5242

Last change on this file since 5242 was 5242, checked in by cameron, 2 years ago

Add pthread create, join and exit support into CBuilder

File size: 27.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
11#include <llvm/Support/ErrorHandling.h>
12#include <toolchain.h>
13
14using namespace llvm;
15using namespace kernel;
16
17KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
18                                 std::string kernelName,
19                                 std::vector<Binding> stream_inputs,
20                                 std::vector<Binding> stream_outputs,
21                                 std::vector<Binding> scalar_parameters,
22                                 std::vector<Binding> scalar_outputs,
23                                 std::vector<Binding> internal_scalars)
24: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {
25
26}
27
28unsigned KernelBuilder::addScalar(Type * type, std::string name) {
29    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
30        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
31    }
32    const auto index = mKernelFields.size();
33    mKernelMap.emplace(name, index);
34    mKernelFields.push_back(type);
35    return index;
36}
37
38void KernelBuilder::prepareKernel() {
39    unsigned blockSize = iBuilder->getBitBlockWidth();
40    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
41        std::string tmp;
42        raw_string_ostream out(tmp);
43        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
44            << mStreamSetInputs.size() << " input stream sets.";
45        throw std::runtime_error(out.str());
46    }
47    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
48        std::string tmp;
49        raw_string_ostream out(tmp);
50        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
51            << mStreamSetOutputs.size() << " output stream sets.";
52        throw std::runtime_error(out.str());
53    }
54    int streamSetNo = 0;
55    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
56        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
57             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
58        }
59        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
60        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
61        streamSetNo++;
62    }
63    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
64        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
65        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
66        streamSetNo++;
67    }
68    for (auto binding : mScalarInputs) {
69        addScalar(binding.type, binding.name);
70    }
71    for (auto binding : mScalarOutputs) {
72        addScalar(binding.type, binding.name);
73    }
74    for (auto binding : mInternalScalars) {
75        addScalar(binding.type, binding.name);
76    }
77    addScalar(iBuilder->getSizeTy(), blockNoScalar);
78    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
79    addScalar(iBuilder->getSizeTy(), processedItemCount);
80    addScalar(iBuilder->getSizeTy(), producedItemCount);
81    addScalar(iBuilder->getInt1Ty(), terminationSignal);
82    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
83}
84
85std::unique_ptr<Module> KernelBuilder::createKernelModule(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer *> output_buffers) {
86    Module * saveModule = iBuilder->getModule();
87    auto savePoint = iBuilder->saveIP();
88    auto theModule = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
89    Module * m = theModule.get();
90    iBuilder->setModule(m);
91    generateKernel(input_buffers, output_buffers);
92    iBuilder->setModule(saveModule);
93    iBuilder->restoreIP(savePoint);
94    return theModule;
95}
96
97void KernelBuilder::generateKernel(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer*> output_buffers) {
98    auto savePoint = iBuilder->saveIP();
99    Module * const m = iBuilder->getModule();
100    mStreamSetInputBuffers = input_buffers;
101    mStreamSetOutputBuffers = output_buffers;
102    prepareKernel();  // possibly overriden by the KernelBuilder subtype
103    addKernelDeclarations(m);
104    generateDoBlockMethod();     // must be implemented by the KernelBuilder subtype
105    generateFinalBlockMethod();  // possibly overriden by the KernelBuilder subtype
106    generateDoSegmentMethod();
107
108    // Implement the accumulator get functions
109    for (auto binding : mScalarOutputs) {
110        auto fnName = mKernelName + accumulator_infix + binding.name;
111        Function * accumFn = m->getFunction(fnName);
112        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
113        Value * self = &*(accumFn->arg_begin());
114        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
115        Value * retVal = iBuilder->CreateLoad(ptr);
116        iBuilder->CreateRet(retVal);
117    }
118    // Implement the initializer function
119    Function * initFunction = m->getFunction(mKernelName + init_suffix);
120    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
121   
122    Function::arg_iterator args = initFunction->arg_begin();
123    Value * self = &*(args++);
124    initializeKernelState(self);
125    for (auto binding : mScalarInputs) {
126        Value * parm = &*(args++);
127        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
128        iBuilder->CreateStore(parm, ptr);
129    }
130    iBuilder->CreateRetVoid();
131    iBuilder->restoreIP(savePoint);
132}
133
134void KernelBuilder::initializeKernelState(Value * self) {
135    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
136}
137
138//  The default finalBlock method simply dispatches to the doBlock routine.
139void KernelBuilder::generateFinalBlockMethod() {
140    auto savePoint = iBuilder->saveIP();
141    Module * m = iBuilder->getModule();
142    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
143    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
144    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
145    // Final Block arguments: self, remaining, then the standard DoBlock args.
146    Function::arg_iterator args = finalBlockFunction->arg_begin();
147    Value * self = &*(args++);
148    /* Skip "remaining" arg */ args++;
149    std::vector<Value *> doBlockArgs = {self};
150    while (args != finalBlockFunction->arg_end()){
151        doBlockArgs.push_back(&*args++);
152    }
153    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
154    iBuilder->CreateRetVoid();
155    iBuilder->restoreIP(savePoint);
156}
157
158// Note: this may be overridden to incorporate doBlock logic directly into
159// the doSegment function.
160void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
161    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
162    iBuilder->CreateCall(doBlockFunction, {self});
163}
164
165//  The default doSegment method dispatches to the doBlock routine for
166//  each block of the given number of blocksToDo, and then updates counts.
167void KernelBuilder::generateDoSegmentMethod() {
168    auto savePoint = iBuilder->saveIP();
169    Module * m = iBuilder->getModule();
170    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
171    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
172    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
173    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
174    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
175    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
176    BasicBlock * checkFinalStride = BasicBlock::Create(iBuilder->getContext(), "checkFinalStride", doSegmentFunction, 0);
177    BasicBlock * checkEndSignals = BasicBlock::Create(iBuilder->getContext(), "checkEndSignals", doSegmentFunction, 0);
178    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
179    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
180    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
181    Type * const size_ty = iBuilder->getSizeTy();
182    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
183    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
184   
185    Function::arg_iterator args = doSegmentFunction->arg_begin();
186    Value * self = &*(args++);
187    Value * blocksToDo = &*(args);
188   
189    std::vector<Value *> inbufProducerPtrs;
190    std::vector<Value *> endSignalPtrs;
191    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
192        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
193        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
194        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
195    }
196   
197    std::vector<Value *> producerPos;
198    /* Determine the actually available data examining all input stream sets. */
199    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
200    producerPos.push_back(p);
201    Value * availablePos = producerPos[0];
202    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
203        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
204        producerPos.push_back(p);
205        /* Set the available position to be the minimum of availablePos and producerPos. */
206        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
207    }
208    Value * processed = getProcessedItemCount(self);
209    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
210//#ifndef NDEBUG
211//    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
212//#endif
213    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
214    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
215    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
216    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
217    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
218    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
219    iBuilder->CreateBr(strideLoopCond);
220
221    iBuilder->SetInsertPoint(strideLoopCond);
222    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
223    stridesRemaining->addIncoming(stridesToDo, entryBlock);
224    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
225    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
226
227    iBuilder->SetInsertPoint(strideLoopBody);
228    Value * blockNo = getScalarField(self, blockNoScalar);   
229
230    generateDoBlockLogic(self, blockNo);
231    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
232    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
233    iBuilder->CreateBr(strideLoopCond);
234   
235    iBuilder->SetInsertPoint(stridesDone);
236    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
237    setProcessedItemCount(self, processed);
238    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalStride, segmentDone);
239   
240    iBuilder->SetInsertPoint(checkFinalStride);
241   
242    /* We had less than a full segment of data; we may have reached the end of input
243       on one of the stream sets.  */
244   
245    Value * alreadyDone = getTerminationSignal(self);
246    iBuilder->CreateCondBr(alreadyDone, finalExit, checkEndSignals);
247   
248    iBuilder->SetInsertPoint(checkEndSignals);
249    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
250    if (endSignalPtrs.size() > 1) {
251        /* If there is more than one input stream set, then we need to confirm that one of
252           them has both the endSignal set and the length = to availablePos. */
253        endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
254        for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
255            Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
256            endOfInput = iBuilder->CreateOr(endOfInput, e);
257        }
258    }
259    iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
260   
261    iBuilder->SetInsertPoint(callFinalBlock);
262   
263    Value * remainingItems = iBuilder->CreateSub(availablePos, processed);
264    createFinalBlockCall(self, remainingItems);
265    setProcessedItemCount(self, availablePos);
266   
267    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
268        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
269        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
270    }
271    setTerminationSignal(self);
272    iBuilder->CreateBr(segmentDone);
273   
274    iBuilder->SetInsertPoint(segmentDone);
275    Value * produced = getProducedItemCount(self);
276//#ifndef NDEBUG
277//    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
278//#endif
279    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
280        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
281        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
282        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
283    }
284    iBuilder->CreateBr(finalExit);
285    iBuilder->SetInsertPoint(finalExit);
286
287    iBuilder->CreateRetVoid();
288    iBuilder->restoreIP(savePoint);
289}
290
291ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
292    const auto f = mKernelMap.find(name);
293    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
294        throw std::runtime_error("Kernel does not contain internal state: " + name);
295    }
296    return iBuilder->getInt32(f->second);
297}
298
299unsigned KernelBuilder::getScalarCount() const {
300    return mKernelFields.size();
301}
302
303Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) {
304    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
305}
306
307Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
308    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
309}
310
311void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
312    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
313}
314
315Value * KernelBuilder::acquireLogicalSegmentNo(Value * self) { 
316    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
317    LoadInst * segNo = iBuilder->CreateAtomicLoadAcquire(ptr);
318    return segNo;
319}
320
321Value * KernelBuilder::getProcessedItemCount(Value * self) { 
322    return getScalarField(self, processedItemCount);
323}
324
325Value * KernelBuilder::getProducedItemCount(Value * self) {
326    return getScalarField(self, producedItemCount);
327}
328
329Value * KernelBuilder::getTerminationSignal(Value * self) {
330    return getScalarField(self, terminationSignal);
331}
332
333void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) {
334    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
335    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
336}
337
338void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
339    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
340    iBuilder->CreateStore(newCount, ptr);
341}
342
343void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
344    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
345    iBuilder->CreateStore(newCount, ptr);
346}
347
348void KernelBuilder::setTerminationSignal(Value * self) {
349    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
350    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
351}
352                                     
353
354
355Value * KernelBuilder::getBlockNo(Value * self) {
356    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
357    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
358    return blockNo;
359}
360
361void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
362    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
363    iBuilder->CreateStore(newFieldVal, ptr);
364}
365
366
367Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
368    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
369        Value * arg = &*argIter;
370        if (arg->getName() == paramName) return arg;
371    }
372    llvm::report_fatal_error("Method does not have parameter: " + paramName);
373}
374
375unsigned KernelBuilder::getStreamSetIndex(std::string name) {
376    const auto f = mStreamSetNameMap.find(name);
377    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
378        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
379    }
380    return f->second;
381}
382
383size_t KernelBuilder::getStreamSetBufferSize(Value * self, std::string name) {
384    const unsigned index = getStreamSetIndex(name);
385    StreamSetBuffer * buf = nullptr;
386    if (index < mStreamSetInputs.size()) {
387        buf = mStreamSetInputBuffers[index];
388    } else {
389        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
390    }
391    return buf->getBufferSize();
392}
393
394Value * KernelBuilder::getStreamSetStructPtr(Value * self, std::string name) {
395    return getScalarField(self, name + structPtrSuffix);
396}
397
398Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string name, Value * blockNo) {
399    Value * const structPtr = getStreamSetStructPtr(self, name);
400    const unsigned index = getStreamSetIndex(name);
401    StreamSetBuffer * buf = nullptr;
402    if (index < mStreamSetInputs.size()) {
403        buf = mStreamSetInputBuffers[index];
404    } else {
405        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
406    }   
407    return buf->getStreamSetBlockPointer(structPtr, blockNo);
408}
409
410void KernelBuilder::createInstance() {
411    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
412    Module * m = iBuilder->getModule();
413    std::vector<Value *> init_args = {mKernelInstance};
414    for (auto a : mInitialArguments) {
415        init_args.push_back(a);
416    }
417    for (auto b : mStreamSetInputBuffers) {
418        init_args.push_back(b->getStreamSetStructPtr());
419    }
420    for (auto b : mStreamSetOutputBuffers) {
421        init_args.push_back(b->getStreamSetStructPtr());
422    }
423    std::string initFnName = mKernelName + init_suffix;
424    Function * initMethod = m->getFunction(initFnName);
425    if (!initMethod) {
426        llvm::report_fatal_error("Cannot find " + initFnName);
427    }
428    iBuilder->CreateCall(initMethod, init_args);
429}
430
431Function * KernelBuilder::generateThreadFunction(std::string name){
432    Module * m = iBuilder->getModule();
433    Type * const voidTy = iBuilder->getVoidTy();
434    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
435    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
436    Type * const int1ty = iBuilder->getInt1Ty();
437
438    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
439    threadFunc->setCallingConv(CallingConv::C);
440    Function::arg_iterator args = threadFunc->arg_begin();
441
442    Value * const arg = &*(args++);
443    arg->setName("args");
444
445    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
446
447    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
448
449    std::vector<Value *> inbufProducerPtrs;
450    std::vector<Value *> inbufConsumerPtrs;
451    std::vector<Value *> outbufProducerPtrs;
452    std::vector<Value *> outbufConsumerPtrs;   
453    std::vector<Value *> endSignalPtrs;
454
455    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
456        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
457        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
458        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
459        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
460    }
461    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
462        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
463        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
464        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
465    }
466
467    const unsigned segmentBlocks = codegen::SegmentSize;
468    const unsigned bufferSegments = codegen::BufferSegments;
469    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
470    Type * const size_ty = iBuilder->getSizeTy();
471
472    Value * segSize = ConstantInt::get(size_ty, segmentSize);
473    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
474    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
475   
476    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
477    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
478   
479    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
480    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
481    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
482    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
483    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
484
485    iBuilder->CreateBr(outputCheckBlock);
486
487    iBuilder->SetInsertPoint(outputCheckBlock);
488
489    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
490    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
491        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
492        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
493        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
494        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
495        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
496    }
497   
498    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
499
500    iBuilder->SetInsertPoint(inputCheckBlock); 
501
502    Value * requiredSize = segSize;
503    if (mLookAheadPositions > 0) {
504        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
505    }
506    waitCondTest = ConstantInt::get(int1ty, 1); 
507    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
508        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
509        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
510        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
511        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
512        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
513    }
514
515    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
516   
517    iBuilder->SetInsertPoint(endSignalCheckBlock);
518   
519    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
520    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
521        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
522        iBuilder->CreateAnd(endSignal, endSignal_next);
523    }
524       
525    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
526   
527    iBuilder->SetInsertPoint(doSegmentBlock);
528 
529    createDoSegmentCall(self, segBlocks);
530
531    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
532        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
533        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
534    }
535   
536    Value * produced = getProducedItemCount(self);
537    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
538        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
539    }
540   
541    Value * earlyEndSignal = getTerminationSignal(self);
542    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
543        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
544        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
545
546        iBuilder->SetInsertPoint(earlyEndBlock);
547        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
548            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
549            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
550        }       
551    }
552    iBuilder->CreateBr(outputCheckBlock);
553     
554    iBuilder->SetInsertPoint(endBlock);
555    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
556    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
557    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
558    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
559    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
560    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
561
562    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
563
564    iBuilder->SetInsertPoint(doFinalSegBlock);
565
566    createDoSegmentCall(self, blocks);
567
568    iBuilder->CreateBr(doFinalBlock);
569
570    iBuilder->SetInsertPoint(doFinalBlock);
571
572    createFinalBlockCall(self, finalBlockRemainingBytes);
573
574    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
575        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
576        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
577    }
578    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
579        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
580    }
581
582    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
583        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
584        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
585    }
586
587    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
588    iBuilder->CreateRetVoid();
589
590    return threadFunc;
591
592}
Note: See TracBrowser for help on using the repository browser.