source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5188

Last change on this file since 5188 was 5188, checked in by cameron, 2 years ago

Do segment method now handles partial/final segment

File size: 27.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
11#include <llvm/IR/TypeBuilder.h>
12#include <llvm/Support/ErrorHandling.h>
13#include <toolchain.h>
14
15using namespace llvm;
16using namespace kernel;
17
18KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
19                                 std::string kernelName,
20                                 std::vector<StreamSetBinding> stream_inputs,
21                                 std::vector<StreamSetBinding> stream_outputs,
22                                 std::vector<ScalarBinding> scalar_parameters,
23                                 std::vector<ScalarBinding> scalar_outputs,
24                                 std::vector<ScalarBinding> internal_scalars) :
25    KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {}
26
27void KernelBuilder::addScalar(Type * t, std::string scalarName) {
28    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
29        llvm::report_fatal_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
30    }
31    unsigned index = mKernelFields.size();
32    mKernelFields.push_back(t);
33    mInternalStateNameMap.emplace(scalarName, index);
34}
35
36void KernelBuilder::prepareKernel() {
37    unsigned blockSize = iBuilder->getBitBlockWidth();
38    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
39        llvm::report_fatal_error("Kernel preparation: Incorrect number of input buffers");
40    }
41    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
42        llvm::report_fatal_error("Kernel preparation: Incorrect number of output buffers");
43    }
44    addScalar(iBuilder->getSizeTy(), blockNoScalar);
45    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
46    addScalar(iBuilder->getSizeTy(), processedItemCount);
47    addScalar(iBuilder->getSizeTy(), producedItemCount);
48    addScalar(iBuilder->getInt1Ty(), terminationSignal);
49    int streamSetNo = 0;
50    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
51        if (!(mStreamSetInputBuffers[i]->getBufferStreamSetType() == mStreamSetInputs[i].ssType)) {
52             llvm::report_fatal_error("Kernel preparation: Incorrect input buffer type");
53        }
54        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
55             errs() << " buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
56             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].ssName);
57        }
58        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + structPtrSuffix});
59        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
60        streamSetNo++;
61    }
62    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
63        if (!(mStreamSetOutputBuffers[i]->getBufferStreamSetType() == mStreamSetOutputs[i].ssType)) {
64             llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type " + mStreamSetOutputs[i].ssName);
65        }
66        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + structPtrSuffix});
67        mStreamSetNameMap.emplace(mStreamSetOutputs[i].ssName, streamSetNo);
68        streamSetNo++;
69    }
70    for (auto binding : mScalarInputs) {
71        addScalar(binding.scalarType, binding.scalarName);
72    }
73    for (auto binding : mScalarOutputs) {
74        addScalar(binding.scalarType, binding.scalarName);
75    }
76    for (auto binding : mInternalScalars) {
77        addScalar(binding.scalarType, binding.scalarName);
78    }
79    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
80}
81
82std::unique_ptr<Module> KernelBuilder::createKernelModule(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer *> output_buffers) {
83    Module * saveModule = iBuilder->getModule();
84    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
85    std::unique_ptr<Module> theModule = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
86    Module * m = theModule.get();
87    iBuilder->setModule(m);
88    generateKernel(input_buffers, output_buffers);
89    iBuilder->setModule(saveModule);
90    iBuilder->restoreIP(savePoint);
91    return theModule;
92}
93
94void KernelBuilder::generateKernel(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer*> output_buffers) {
95    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
96    Module * m = iBuilder->getModule();
97    mStreamSetInputBuffers = input_buffers;
98    mStreamSetOutputBuffers = output_buffers;
99    prepareKernel();  // possibly overriden by the KernelBuilder subtype
100    KernelInterface::addKernelDeclarations(m);
101    generateDoBlockMethod();     // must be implemented by the KernelBuilder subtype
102    generateFinalBlockMethod();  // possibly overriden by the KernelBuilder subtype
103    generateDoSegmentMethod();
104
105    // Implement the accumulator get functions
106    for (auto binding : mScalarOutputs) {
107        auto fnName = mKernelName + accumulator_infix + binding.scalarName;
108        Function * accumFn = m->getFunction(fnName);
109        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.scalarName, accumFn, 0));
110        Value * self = &*(accumFn->arg_begin());
111        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
112        Value * retVal = iBuilder->CreateLoad(ptr);
113        iBuilder->CreateRet(retVal);
114    }
115    // Implement the initializer function
116    Function * initFunction = m->getFunction(mKernelName + init_suffix);
117    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
118   
119    Function::arg_iterator args = initFunction->arg_begin();
120    Value * self = &*(args++);
121    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
122    for (auto binding : mScalarInputs) {
123        Value * parm = &*(args++);
124        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
125        iBuilder->CreateStore(parm, ptr);
126    }
127    iBuilder->CreateRetVoid();
128    iBuilder->restoreIP(savePoint);
129}
130
131//  The default finalBlock method simply dispatches to the doBlock routine.
132void KernelBuilder::generateFinalBlockMethod() {
133    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
134    Module * m = iBuilder->getModule();
135    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
136    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
137    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
138    // Final Block arguments: self, remaining, then the standard DoBlock args.
139    Function::arg_iterator args = finalBlockFunction->arg_begin();
140    Value * self = &*(args++);
141    /* Skip "remaining" arg */ args++;
142    std::vector<Value *> doBlockArgs = {self};
143    while (args != finalBlockFunction->arg_end()){
144        doBlockArgs.push_back(&*args++);
145    }
146    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
147    iBuilder->CreateRetVoid();
148    iBuilder->restoreIP(savePoint);
149}
150
151// Note: this may be overridden to incorporate doBlock logic directly into
152// the doSegment function.
153void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
154    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
155    iBuilder->CreateCall(doBlockFunction, {self});
156}
157
158//  The default doSegment method dispatches to the doBlock routine for
159//  each block of the given number of blocksToDo, and then updates counts.
160void KernelBuilder::generateDoSegmentMethod() {
161    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
162    Module * m = iBuilder->getModule();
163    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
164    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
165    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
166    BasicBlock * blockLoopCond = BasicBlock::Create(iBuilder->getContext(), "blockLoopCond", doSegmentFunction, 0);
167    BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0);
168    BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
169    BasicBlock * checkFinalBlock = BasicBlock::Create(iBuilder->getContext(), "checkFinalBlock", doSegmentFunction, 0);
170    BasicBlock * callFinalBlock = BasicBlock::Create(iBuilder->getContext(), "callFinalBlock", doSegmentFunction, 0);
171    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
172    Type * const size_ty = iBuilder->getSizeTy();
173    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
174    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
175   
176    Function::arg_iterator args = doSegmentFunction->arg_begin();
177    Value * self = &*(args++);
178    Value * blocksToDo = &*(args);
179    Value * segmentNo = getLogicalSegmentNo(self);
180   
181    std::vector<Value *> inbufProducerPtrs;
182    std::vector<Value *> endSignalPtrs;
183    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
184        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName);
185        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
186        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(ssStructPtr));
187    }
188   
189    std::vector<Value *> producerPos;
190    /* Determine the actually available data examining all input stream sets. */
191    LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t));
192    p->setOrdering(AtomicOrdering::Acquire);
193    producerPos.push_back(p);
194    Value * availablePos = producerPos[0];
195    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
196        LoadInst * p = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
197        p->setOrdering(AtomicOrdering::Acquire);
198        producerPos.push_back(p);
199        /* Set the available position to be the minimum of availablePos and producerPos. */
200        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
201    }
202    Value * processed = getProcessedItemCount(self);
203    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
204#ifndef NDEBUG
205    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
206#endif
207    Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride);
208    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
209    Value * lessThanFullSegment = iBuilder->CreateICmpULT(blocksAvail, blocksToDo);
210    blocksToDo = iBuilder->CreateSelect(lessThanFullSegment, blocksAvail, blocksToDo);
211    //iBuilder->CallPrintInt(mKernelName + "_blocksAvail", blocksAvail);
212    iBuilder->CreateBr(blockLoopCond);
213
214    iBuilder->SetInsertPoint(blockLoopCond);
215    PHINode * blocksRemaining = iBuilder->CreatePHI(size_ty, 2, "blocksRemaining");
216    blocksRemaining->addIncoming(blocksToDo, entryBlock);
217    Value * notDone = iBuilder->CreateICmpUGT(blocksRemaining, ConstantInt::get(size_ty, 0));
218    iBuilder->CreateCondBr(notDone, blockLoopBody, blocksDone);
219
220    iBuilder->SetInsertPoint(blockLoopBody);
221    Value * blockNo = getScalarField(self, blockNoScalar);   
222
223    generateDoBlockLogic(self, blockNo);
224    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
225    blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, strideBlocks), blockLoopBody);
226    iBuilder->CreateBr(blockLoopCond);
227   
228    iBuilder->SetInsertPoint(blocksDone);
229    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride));
230    setProcessedItemCount(self, processed);
231    iBuilder->CreateCondBr(lessThanFullSegment, checkFinalBlock, segmentDone);
232   
233    iBuilder->SetInsertPoint(checkFinalBlock);
234   
235    /* We had less than a full segment of data; we may have reached the end of input
236       on one of the stream sets.  */
237   
238    Value * endOfInput = iBuilder->CreateLoad(endSignalPtrs[0]);
239    if (endSignalPtrs.size() > 1) {
240        /* If there is more than one input stream set, then we need to confirm that one of
241           them has both the endSignal set and the length = to availablePos. */
242        endOfInput = iBuilder->CreateAnd(endOfInput, iBuilder->CreateICmpEQ(availablePos, producerPos[0]));
243        for (unsigned i = 1; i < endSignalPtrs.size(); i++) {
244            Value * e = iBuilder->CreateAnd(iBuilder->CreateLoad(endSignalPtrs[i]), iBuilder->CreateICmpEQ(availablePos, producerPos[i]));
245            endOfInput = iBuilder->CreateOr(endOfInput, e);
246        }
247    }
248    iBuilder->CreateCondBr(endOfInput, callFinalBlock, segmentDone);
249   
250    iBuilder->SetInsertPoint(callFinalBlock);
251   
252    Value * remainingItems = iBuilder->CreateURem(availablePos, stride);
253    createFinalBlockCall(self, remainingItems);
254    setProcessedItemCount(self, availablePos);
255   
256    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
257        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
258        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
259    }
260   
261    iBuilder->CreateBr(segmentDone);
262   
263    iBuilder->SetInsertPoint(segmentDone);
264    Value * produced = getProducedItemCount(self);
265#ifndef NDEBUG
266    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
267#endif
268    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
269        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
270        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
271        iBuilder->CreateAlignedStore(produced, producerPosPtr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
272    }
273
274    // Must be the last action, for synchronization.
275    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1)));
276
277    iBuilder->CreateRetVoid();
278    iBuilder->restoreIP(savePoint);
279}
280
281Value * KernelBuilder::getScalarIndex(std::string fieldName) {
282    const auto f = mInternalStateNameMap.find(fieldName);
283    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
284        llvm::report_fatal_error("Kernel does not contain internal state: " + fieldName);
285    }
286    return iBuilder->getInt32(f->second);
287}
288
289
290
291Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
292    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
293    return iBuilder->CreateLoad(ptr);
294}
295
296void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
297    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
298    iBuilder->CreateStore(newFieldVal, ptr);
299}
300
301Value * KernelBuilder::getLogicalSegmentNo(Value * self) { 
302    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
303    LoadInst * segNo = iBuilder->CreateAlignedLoad(ptr, sizeof(size_t));
304    segNo->setOrdering(AtomicOrdering::Acquire);
305    return segNo;
306}
307
308Value * KernelBuilder::getProcessedItemCount(Value * self) { 
309    return getScalarField(self, processedItemCount);
310}
311
312Value * KernelBuilder::getProducedItemCount(Value * self) {
313    return getScalarField(self, producedItemCount);
314}
315
316//  By default, kernels do not terminate early. 
317Value * KernelBuilder::getTerminationSignal(Value * self) {
318    return ConstantInt::getNullValue(iBuilder->getInt1Ty());
319}
320
321
322void KernelBuilder::setLogicalSegmentNo(Value * self, Value * newCount) {
323    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
324    iBuilder->CreateAlignedStore(newCount, ptr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
325}
326
327void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
328    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
329    iBuilder->CreateStore(newCount, ptr);
330}
331
332void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
333    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
334    iBuilder->CreateStore(newCount, ptr);
335}
336
337void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) {
338    llvm::report_fatal_error("This kernel type does not support setTerminationSignal.");
339}
340
341
342Value * KernelBuilder::getBlockNo(Value * self) {
343    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
344    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
345    return blockNo;
346}
347
348void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
349    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
350    iBuilder->CreateStore(newFieldVal, ptr);
351}
352
353
354Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
355    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
356        Value * arg = &*argIter;
357        if (arg->getName() == paramName) return arg;
358    }
359    llvm::report_fatal_error("Method does not have parameter: " + paramName);
360}
361
362unsigned KernelBuilder::getStreamSetIndex(std::string ssName) {
363    const auto f = mStreamSetNameMap.find(ssName);
364    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
365        llvm::report_fatal_error("Kernel does not contain stream set: " + ssName);
366    }
367    return f->second;
368}
369
370size_t KernelBuilder::getStreamSetBufferSize(Value * self, std::string ssName) {
371    unsigned ssIndex = getStreamSetIndex(ssName);
372    if (ssIndex < mStreamSetInputs.size()) {
373        return mStreamSetInputBuffers[ssIndex]->getBufferSize();
374    }
375    else {
376        return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getBufferSize();
377    }
378}
379
380Value * KernelBuilder::getStreamSetStructPtr(Value * self, std::string ssName) {
381    return getScalarField(self, ssName + structPtrSuffix);
382}
383
384Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string ssName, Value * blockNo) {
385    Value * ssStructPtr = getStreamSetStructPtr(self, ssName);
386    unsigned ssIndex = getStreamSetIndex(ssName);
387    if (ssIndex < mStreamSetInputs.size()) {
388        return mStreamSetInputBuffers[ssIndex]->getStreamSetBlockPointer(ssStructPtr, blockNo);
389    }
390    else {
391        return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getStreamSetBlockPointer(ssStructPtr, blockNo);
392    }
393}
394
395Value * KernelBuilder::createInstance(std::vector<Value *> args) {
396    Value * kernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
397    Module * m = iBuilder->getModule();
398    std::vector<Value *> init_args = {kernelInstance};
399    for (auto a : args) {
400        init_args.push_back(a);
401    }
402    for (auto b : mStreamSetInputBuffers) { 
403        init_args.push_back(b->getStreamSetStructPtr());
404    }
405    for (auto b : mStreamSetOutputBuffers) { 
406        init_args.push_back(b->getStreamSetStructPtr());
407    }
408    std::string initFnName = mKernelName + init_suffix;
409    Function * initMethod = m->getFunction(initFnName);
410    if (!initMethod) {
411        llvm::report_fatal_error("Cannot find " + initFnName);
412    }
413    iBuilder->CreateCall(initMethod, init_args);
414    return kernelInstance;
415}
416
417Function * KernelBuilder::generateThreadFunction(std::string name){
418    Module * m = iBuilder->getModule();
419    Type * const voidTy = Type::getVoidTy(m->getContext());
420    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
421    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
422    Type * const int1ty = iBuilder->getInt1Ty();
423
424    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
425    threadFunc->setCallingConv(CallingConv::C);
426    Function::arg_iterator args = threadFunc->arg_begin();
427
428    Value * const arg = &*(args++);
429    arg->setName("args");
430
431    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
432
433    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
434
435    std::vector<Value *> inbufProducerPtrs;
436    std::vector<Value *> inbufConsumerPtrs;
437    std::vector<Value *> outbufProducerPtrs;
438    std::vector<Value *> outbufConsumerPtrs;   
439    std::vector<Value *> endSignalPtrs;
440
441    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
442        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].ssName);
443        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
444        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
445        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(ssStructPtr));
446    }
447    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
448        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
449        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
450        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
451    }
452
453    const unsigned segmentBlocks = codegen::SegmentSize;
454    const unsigned bufferSegments = codegen::BufferSegments;
455    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
456    Type * const size_ty = iBuilder->getSizeTy();
457
458    Value * segSize = ConstantInt::get(size_ty, segmentSize);
459    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
460    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
461   
462    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
463    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
464   
465    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
466    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
467    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
468    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
469    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
470
471    iBuilder->CreateBr(outputCheckBlock);
472
473    iBuilder->SetInsertPoint(outputCheckBlock);
474
475    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
476    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
477        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], sizeof(size_t));
478        producerPos->setOrdering(AtomicOrdering::Acquire);
479        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
480        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], sizeof(size_t));
481        consumerPos->setOrdering(AtomicOrdering::Acquire);
482        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
483        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
484    }
485   
486    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
487
488    iBuilder->SetInsertPoint(inputCheckBlock); 
489
490    Value * requiredSize = segSize;
491    if (mLookAheadPositions > 0) {
492        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
493    }
494    waitCondTest = ConstantInt::get(int1ty, 1); 
495    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
496        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
497        producerPos->setOrdering(AtomicOrdering::Acquire);
498        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
499        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], sizeof(size_t));
500        consumerPos->setOrdering(AtomicOrdering::Acquire);
501        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
502        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
503    }
504
505    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
506   
507    iBuilder->SetInsertPoint(endSignalCheckBlock);
508   
509    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
510    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
511        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
512        iBuilder->CreateAnd(endSignal, endSignal_next);
513    }
514       
515    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
516   
517    iBuilder->SetInsertPoint(doSegmentBlock);
518 
519    createDoSegmentCall(self, segBlocks);
520
521    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
522        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
523        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
524    }
525   
526    Value * produced = getProducedItemCount(self);
527    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
528        iBuilder->CreateAlignedStore(produced, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
529    }
530   
531    Value * earlyEndSignal = getTerminationSignal(self);
532    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
533        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
534        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
535
536        iBuilder->SetInsertPoint(earlyEndBlock);
537        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
538            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
539            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
540        }       
541    }
542    iBuilder->CreateBr(outputCheckBlock);
543     
544    iBuilder->SetInsertPoint(endBlock);
545    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
546    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
547    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
548    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
549    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
550    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
551
552    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
553
554    iBuilder->SetInsertPoint(doFinalSegBlock);
555
556    createDoSegmentCall(self, blocks);
557
558    iBuilder->CreateBr(doFinalBlock);
559
560    iBuilder->SetInsertPoint(doFinalBlock);
561
562    createFinalBlockCall(self, finalBlockRemainingBytes);
563
564    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
565        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
566        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
567    }
568    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
569        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
570    }
571
572    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
573        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].ssName);
574        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
575    }
576
577    Value * nullVal = Constant::getNullValue(voidPtrTy);
578    Function * pthreadExitFunc = m->getFunction("pthread_exit");
579    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal}); 
580    exitThread->setDoesNotReturn();
581    iBuilder->CreateRetVoid();
582
583    return threadFunc;
584
585}
Note: See TracBrowser for help on using the repository browser.