source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5183

Last change on this file since 5183 was 5183, checked in by cameron, 3 years ago

Kernels compute and check produced item counts

File size: 25.7 KB
RevLine 
[4924]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
[5063]7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
[5135]11#include <llvm/IR/TypeBuilder.h>
[5174]12#include <llvm/Support/ErrorHandling.h>
[5135]13#include <toolchain.h>
[4924]14
[4959]15using namespace llvm;
[5063]16using namespace kernel;
[4959]17
[5063]18KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
19                                 std::string kernelName,
20                                 std::vector<StreamSetBinding> stream_inputs,
21                                 std::vector<StreamSetBinding> stream_outputs,
22                                 std::vector<ScalarBinding> scalar_parameters,
23                                 std::vector<ScalarBinding> scalar_outputs,
24                                 std::vector<ScalarBinding> internal_scalars) :
[5076]25    KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars) {}
[4974]26
[5063]27void KernelBuilder::addScalar(Type * t, std::string scalarName) {
28    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
[5174]29        llvm::report_fatal_error("Illegal addition of kernel field after kernel state finalized: " + scalarName);
[4924]30    }
[5063]31    unsigned index = mKernelFields.size();
32    mKernelFields.push_back(t);
[5104]33    mInternalStateNameMap.emplace(scalarName, index);
[4924]34}
[4968]35
[5076]36void KernelBuilder::prepareKernel() {
[5142]37    unsigned blockSize = iBuilder->getBitBlockWidth();
[5133]38    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
[5174]39        llvm::report_fatal_error("Kernel preparation: Incorrect number of input buffers");
[5133]40    }
41    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
[5174]42        llvm::report_fatal_error("Kernel preparation: Incorrect number of output buffers");
[5133]43    }
[5106]44    addScalar(iBuilder->getSizeTy(), blockNoScalar);
[5174]45    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
46    addScalar(iBuilder->getSizeTy(), processedItemCount);
47    addScalar(iBuilder->getSizeTy(), producedItemCount);
48    addScalar(iBuilder->getInt1Ty(), terminationSignal);
[5104]49    int streamSetNo = 0;
[5133]50    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
51        if (!(mStreamSetInputBuffers[i]->getBufferStreamSetType() == mStreamSetInputs[i].ssType)) {
[5174]52             llvm::report_fatal_error("Kernel preparation: Incorrect input buffer type");
[5133]53        }
[5174]54        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
[5142]55             errs() << "buffer size = " << mStreamSetInputBuffers[i]->getBufferSize() << "\n";
[5174]56             llvm::report_fatal_error("Kernel preparation: Buffer size too small.");
[5142]57        }
[5135]58        mScalarInputs.push_back(ScalarBinding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].ssName + basePtrSuffix});
[5133]59        mStreamSetNameMap.emplace(mStreamSetInputs[i].ssName, streamSetNo);
[5104]60        streamSetNo++;
[5086]61    }
[5133]62    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
63        if (!(mStreamSetOutputBuffers[i]->getBufferStreamSetType() == mStreamSetOutputs[i].ssType)) {
[5174]64             llvm::report_fatal_error("Kernel preparation: Incorrect output buffer type");
[5133]65        }
[5135]66        mScalarInputs.push_back(ScalarBinding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].ssName + basePtrSuffix});
[5133]67        mStreamSetNameMap.emplace(mStreamSetOutputs[i].ssName, streamSetNo);
[5104]68        streamSetNo++;
[5086]69    }
[5076]70    for (auto binding : mScalarInputs) {
71        addScalar(binding.scalarType, binding.scalarName);
72    }
73    for (auto binding : mScalarOutputs) {
74        addScalar(binding.scalarType, binding.scalarName);
75    }
76    for (auto binding : mInternalScalars) {
77        addScalar(binding.scalarType, binding.scalarName);
78    }
[5175]79    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
[4970]80}
81
[5133]82std::unique_ptr<Module> KernelBuilder::createKernelModule(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer *> output_buffers) {
[5063]83    Module * saveModule = iBuilder->getModule();
84    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
[5175]85    std::unique_ptr<Module> theModule = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
[5063]86    Module * m = theModule.get();
87    iBuilder->setModule(m);
[5133]88    generateKernel(input_buffers, output_buffers);
[5063]89    iBuilder->setModule(saveModule);
90    iBuilder->restoreIP(savePoint);
91    return theModule;
[4970]92}
93
[5133]94void KernelBuilder::generateKernel(std::vector<StreamSetBuffer *> input_buffers, std::vector<StreamSetBuffer*> output_buffers) {
[5074]95    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
[5063]96    Module * m = iBuilder->getModule();
[5133]97    mStreamSetInputBuffers = input_buffers;
98    mStreamSetOutputBuffers = output_buffers;
[5076]99    prepareKernel();  // possibly overriden by the KernelBuilder subtype
[5074]100    KernelInterface::addKernelDeclarations(m);
101    generateDoBlockMethod();     // must be implemented by the KernelBuilder subtype
102    generateFinalBlockMethod();  // possibly overriden by the KernelBuilder subtype
[5086]103    generateDoSegmentMethod();
[5074]104
[5063]105    // Implement the accumulator get functions
106    for (auto binding : mScalarOutputs) {
107        auto fnName = mKernelName + accumulator_infix + binding.scalarName;
108        Function * accumFn = m->getFunction(fnName);
109        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.scalarName, accumFn, 0));
110        Value * self = &*(accumFn->arg_begin());
111        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
112        Value * retVal = iBuilder->CreateLoad(ptr);
113        iBuilder->CreateRet(retVal);
[4995]114    }
[5063]115    // Implement the initializer function
116    Function * initFunction = m->getFunction(mKernelName + init_suffix);
117    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));
118   
119    Function::arg_iterator args = initFunction->arg_begin();
[5051]120    Value * self = &*(args++);
121    iBuilder->CreateStore(Constant::getNullValue(mKernelStateType), self);
[5063]122    for (auto binding : mScalarInputs) {
[5051]123        Value * parm = &*(args++);
[5063]124        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.scalarName)});
125        iBuilder->CreateStore(parm, ptr);
[5051]126    }
127    iBuilder->CreateRetVoid();
[5063]128    iBuilder->restoreIP(savePoint);
[5051]129}
130
[5074]131//  The default finalBlock method simply dispatches to the doBlock routine.
132void KernelBuilder::generateFinalBlockMethod() {
[5063]133    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
[5074]134    Module * m = iBuilder->getModule();
[5063]135    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
136    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
137    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
138    // Final Block arguments: self, remaining, then the standard DoBlock args.
139    Function::arg_iterator args = finalBlockFunction->arg_begin();
140    Value * self = &*(args++);
141    /* Skip "remaining" arg */ args++;
142    std::vector<Value *> doBlockArgs = {self};
143    while (args != finalBlockFunction->arg_end()){
144        doBlockArgs.push_back(&*args++);
145    }
[5115]146    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
[5111]147    iBuilder->CreateRetVoid();
[5063]148    iBuilder->restoreIP(savePoint);
[4986]149}
[4924]150
[5174]151void KernelBuilder::generateDoBlockLogic(Value * self, Value * blockNo) {
152    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
153    iBuilder->CreateCall(doBlockFunction, {self});
154}
155
156//  The default doSegment method dispatches to the doBlock routine for
157//  each block of the given number of blocksToDo, and then updates counts.
[5086]158void KernelBuilder::generateDoSegmentMethod() {
159    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
160    Module * m = iBuilder->getModule();
161    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
162    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
163    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
[5165]164    BasicBlock * blockLoopCond = BasicBlock::Create(iBuilder->getContext(), "blockLoopCond", doSegmentFunction, 0);
165    BasicBlock * blockLoopBody = BasicBlock::Create(iBuilder->getContext(), "blockLoopBody", doSegmentFunction, 0);
[5086]166    BasicBlock * blocksDone = BasicBlock::Create(iBuilder->getContext(), "blocksDone", doSegmentFunction, 0);
[5165]167    Type * const size_ty = iBuilder->getSizeTy();
[5183]168    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
[5174]169    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
[5086]170   
171    Function::arg_iterator args = doSegmentFunction->arg_begin();
172    Value * self = &*(args++);
173    Value * blocksToDo = &*(args);
[5174]174    Value * segmentNo = getLogicalSegmentNo(self);
[5183]175    std::vector<Value *> inbufProducerPtrs;
176   
177    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
178        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
179        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
180    }
181   
182    /* Determine the actually available data examining all input stream sets. */
183    LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[0], sizeof(size_t));
184    producerPos->setOrdering(AtomicOrdering::Acquire);
185    Value * availablePos = producerPos;
186    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
187        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
188        producerPos->setOrdering(AtomicOrdering::Acquire);
189        /* Set the available position to be the minimum of availablePos and producerPos. */
190        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, producerPos), availablePos, producerPos);
191    }
192    Value * processed = getProcessedItemCount(self);
193    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
194#ifndef NDEBUG
195    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
196#endif
197    Value * blocksAvail = iBuilder->CreateUDiv(itemsAvail, stride);
198    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
199    blocksToDo = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocksToDo, blocksAvail), blocksToDo, blocksAvail);
[5165]200    iBuilder->CreateBr(blockLoopCond);
201
202    iBuilder->SetInsertPoint(blockLoopCond);
203    PHINode * blocksRemaining = iBuilder->CreatePHI(size_ty, 2, "blocksRemaining");
[5111]204    blocksRemaining->addIncoming(blocksToDo, entryBlock);
[5165]205    Value * notDone = iBuilder->CreateICmpUGT(blocksRemaining, ConstantInt::get(size_ty, 0));
206    iBuilder->CreateCondBr(notDone, blockLoopBody, blocksDone);
207
208    iBuilder->SetInsertPoint(blockLoopBody);
209    Value * blockNo = getScalarField(self, blockNoScalar);   
[5174]210    generateDoBlockLogic(self, blockNo);
211    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
212    blocksRemaining->addIncoming(iBuilder->CreateSub(blocksRemaining, strideBlocks), blockLoopBody);
[5165]213    iBuilder->CreateBr(blockLoopCond);
[5111]214   
215    iBuilder->SetInsertPoint(blocksDone);
[5183]216    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(blocksToDo, stride));
217    setProcessedItemCount(self, processed);
218    Value * produced = getProducedItemCount(self);
219#ifndef NDEBUG
220    iBuilder->CallPrintInt(mKernelName + "_produced", produced);
221#endif
222    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
223        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
224        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr);
225        iBuilder->CreateAlignedStore(produced, producerPosPtr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
226    }
227
[5174]228    // Must be the last action, for synchronization.
229    setLogicalSegmentNo(self, iBuilder->CreateAdd(segmentNo, ConstantInt::get(size_ty, 1)));
230
[5111]231    iBuilder->CreateRetVoid();
[5086]232    iBuilder->restoreIP(savePoint);
233}
234
[5063]235Value * KernelBuilder::getScalarIndex(std::string fieldName) {
236    const auto f = mInternalStateNameMap.find(fieldName);
237    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
[5174]238        llvm::report_fatal_error("Kernel does not contain internal state: " + fieldName);
[5000]239    }
[5104]240    return iBuilder->getInt32(f->second);
[4959]241}
[4924]242
[5109]243
244
[5063]245Value * KernelBuilder::getScalarField(Value * self, std::string fieldName) {
246    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
247    return iBuilder->CreateLoad(ptr);
[4924]248}
249
[5063]250void KernelBuilder::setScalarField(Value * self, std::string fieldName, Value * newFieldVal) {
251    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
252    iBuilder->CreateStore(newFieldVal, ptr);
[5008]253}
[5063]254
[5174]255Value * KernelBuilder::getLogicalSegmentNo(Value * self) { 
256    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
257    LoadInst * segNo = iBuilder->CreateAlignedLoad(ptr, sizeof(size_t));
[5175]258    segNo->setOrdering(AtomicOrdering::Acquire);
[5174]259    return segNo;
260}
261
262Value * KernelBuilder::getProcessedItemCount(Value * self) { 
263    return getScalarField(self, processedItemCount);
264}
265
266Value * KernelBuilder::getProducedItemCount(Value * self) {
267    return getScalarField(self, producedItemCount);
268}
269
270//  By default, kernels do not terminate early. 
271Value * KernelBuilder::getTerminationSignal(Value * self) {
272    return ConstantInt::getNullValue(iBuilder->getInt1Ty());
273}
274
275
276void KernelBuilder::setLogicalSegmentNo(Value * self, Value * newCount) {
277    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
[5175]278    iBuilder->CreateAlignedStore(newCount, ptr, sizeof(size_t))->setOrdering(AtomicOrdering::Release);
[5174]279}
280
281void KernelBuilder::setProcessedItemCount(Value * self, Value * newCount) {
282    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(processedItemCount)});
283    iBuilder->CreateStore(newCount, ptr);
284}
285
286void KernelBuilder::setProducedItemCount(Value * self, Value * newCount) {
287    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(producedItemCount)});
288    iBuilder->CreateStore(newCount, ptr);
289}
290
291void KernelBuilder::setTerminationSignal(Value * self, Value * newFieldVal) {
292    llvm::report_fatal_error("This kernel type does not support setTerminationSignal.");
293}
294
295
[5165]296Value * KernelBuilder::getBlockNo(Value * self) {
297    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
[5174]298    LoadInst * blockNo = iBuilder->CreateLoad(ptr);
[5165]299    return blockNo;
300}
[5063]301
[5165]302void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) {
303    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
[5174]304    iBuilder->CreateStore(newFieldVal, ptr);
[5165]305}
306
307
[5063]308Value * KernelBuilder::getParameter(Function * f, std::string paramName) {
309    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
310        Value * arg = &*argIter;
311        if (arg->getName() == paramName) return arg;
[5051]312    }
[5174]313    llvm::report_fatal_error("Method does not have parameter: " + paramName);
[5051]314}
[5008]315
[5104]316unsigned KernelBuilder::getStreamSetIndex(std::string ssName) {
317    const auto f = mStreamSetNameMap.find(ssName);
318    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
[5174]319        llvm::report_fatal_error("Kernel does not contain stream set: " + ssName);
[5104]320    }
321    return f->second;
322}
[5063]323
[5109]324size_t KernelBuilder::getStreamSetBufferSize(Value * self, std::string ssName) {
325    unsigned ssIndex = getStreamSetIndex(ssName);
326    if (ssIndex < mStreamSetInputs.size()) {
[5133]327        return mStreamSetInputBuffers[ssIndex]->getBufferSize();
[5109]328    }
329    else {
[5133]330        return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getBufferSize();
[5109]331    }
332}
333
[5104]334Value * KernelBuilder::getStreamSetBasePtr(Value * self, std::string ssName) {
335    return getScalarField(self, ssName + basePtrSuffix);
336}
337
338Value * KernelBuilder::getStreamSetBlockPtr(Value * self, std::string ssName, Value * blockNo) {
339    Value * basePtr = getStreamSetBasePtr(self, ssName);
340    unsigned ssIndex = getStreamSetIndex(ssName);
341    if (ssIndex < mStreamSetInputs.size()) {
[5133]342        return mStreamSetInputBuffers[ssIndex]->getStreamSetBlockPointer(basePtr, blockNo);
[5104]343    }
344    else {
[5133]345        return mStreamSetOutputBuffers[ssIndex - mStreamSetInputs.size()]->getStreamSetBlockPointer(basePtr, blockNo);
[5104]346    }
347}
348
[5133]349Value * KernelBuilder::createInstance(std::vector<Value *> args) {
350    Value * kernelInstance = iBuilder->CreateAlloca(mKernelStateType);
351    Module * m = iBuilder->getModule();
352    std::vector<Value *> init_args = {kernelInstance};
353    for (auto a : args) {
354        init_args.push_back(a);
355    }
356    for (auto b : mStreamSetInputBuffers) { 
[5135]357        init_args.push_back(b->getStreamSetStructPtr());
[5133]358    }
359    for (auto b : mStreamSetOutputBuffers) { 
[5135]360        init_args.push_back(b->getStreamSetStructPtr());
[5133]361    }
362    std::string initFnName = mKernelName + init_suffix;
363    Function * initMethod = m->getFunction(initFnName);
364    if (!initMethod) {
[5174]365        llvm::report_fatal_error("Cannot find " + initFnName);
[5133]366    }
367    iBuilder->CreateCall(initMethod, init_args);
368    return kernelInstance;
369}
[5104]370
[5135]371Function * KernelBuilder::generateThreadFunction(std::string name){
372    Module * m = iBuilder->getModule();
373    Type * const voidTy = Type::getVoidTy(m->getContext());
374    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
375    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
376    Type * const int1ty = iBuilder->getInt1Ty();
[5104]377
[5135]378    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
379    threadFunc->setCallingConv(CallingConv::C);
380    Function::arg_iterator args = threadFunc->arg_begin();
[5104]381
[5135]382    Value * const arg = &*(args++);
383    arg->setName("args");
[5133]384
[5135]385    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
386
387    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
388
389    std::vector<Value *> inbufProducerPtrs;
390    std::vector<Value *> inbufConsumerPtrs;
391    std::vector<Value *> outbufProducerPtrs;
392    std::vector<Value *> outbufConsumerPtrs;   
393    std::vector<Value *> endSignalPtrs;
394
395    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
396        Value * basePtr = getStreamSetBasePtr(self, mStreamSetInputs[i].ssName);
397        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(basePtr));
[5174]398        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(basePtr));
[5135]399        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->hasEndOfInputPtr(basePtr));
400    }
401    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
402        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
403        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(basePtr));
[5174]404        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(basePtr));
[5135]405    }
406
407    const unsigned segmentBlocks = codegen::SegmentSize;
408    const unsigned bufferSegments = codegen::BufferSegments;
409    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
410    Type * const size_ty = iBuilder->getSizeTy();
411
412    Value * segSize = ConstantInt::get(size_ty, segmentSize);
413    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
414    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
415   
416    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
417    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
418   
419    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
420    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
421    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
422    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
423    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
424
425    iBuilder->CreateBr(outputCheckBlock);
426
427    iBuilder->SetInsertPoint(outputCheckBlock);
428
429    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
430    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5174]431        LoadInst * producerPos = iBuilder->CreateAlignedLoad(outbufProducerPtrs[i], sizeof(size_t));
[5175]432        producerPos->setOrdering(AtomicOrdering::Acquire);
[5135]433        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
[5174]434        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(outbufConsumerPtrs[i], sizeof(size_t));
[5175]435        consumerPos->setOrdering(AtomicOrdering::Acquire);
[5135]436        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
437        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
438    }
439   
440    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
441
442    iBuilder->SetInsertPoint(inputCheckBlock); 
443
[5174]444    Value * requiredSize = segSize;
445    if (mLookAheadPositions > 0) {
446        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
447    }
[5135]448    waitCondTest = ConstantInt::get(int1ty, 1); 
449    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
[5174]450        LoadInst * producerPos = iBuilder->CreateAlignedLoad(inbufProducerPtrs[i], sizeof(size_t));
[5175]451        producerPos->setOrdering(AtomicOrdering::Acquire);
[5135]452        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
[5174]453        LoadInst * consumerPos = iBuilder->CreateAlignedLoad(inbufConsumerPtrs[i], sizeof(size_t));
[5175]454        consumerPos->setOrdering(AtomicOrdering::Acquire);
[5135]455        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
[5174]456        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
[5135]457    }
458
459    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
460   
461    iBuilder->SetInsertPoint(endSignalCheckBlock);
462   
[5174]463    LoadInst * endSignal = iBuilder->CreateAlignedLoad(endSignalPtrs[0], sizeof(size_t));
[5135]464    // iBuilder->CallPrintInt(name + ":endSignal", endSignal);
[5175]465    endSignal->setOrdering(AtomicOrdering::Acquire);
[5135]466    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
[5174]467        LoadInst * endSignal_next = iBuilder->CreateAlignedLoad(endSignalPtrs[i], sizeof(size_t));
[5175]468        endSignal_next->setOrdering(AtomicOrdering::Acquire);
[5135]469        iBuilder->CreateAnd(endSignal, endSignal_next);
470    }
471       
472    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(endSignal, ConstantInt::get(iBuilder->getInt8Ty(), 1)), endBlock, inputCheckBlock);
473   
474    iBuilder->SetInsertPoint(doSegmentBlock);
475 
476    createDoSegmentCall(self, segBlocks);
477
478    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
479        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
[5175]480        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
[5135]481    }
[5174]482   
483    Value * produced = getProducedItemCount(self);
[5135]484    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5175]485        iBuilder->CreateAlignedStore(produced, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
[5135]486    }
487   
[5174]488    Value * earlyEndSignal = getTerminationSignal(self);
489    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
490        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
491        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
492
493        iBuilder->SetInsertPoint(earlyEndBlock);
494        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
495            Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
496            mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
497        }       
498    }
[5135]499    iBuilder->CreateBr(outputCheckBlock);
500     
501    iBuilder->SetInsertPoint(endBlock);
502    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
503    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
504    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
505    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
506    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
507    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
508
509    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
510
511    iBuilder->SetInsertPoint(doFinalSegBlock);
512
513    createDoSegmentCall(self, blocks);
514
515    iBuilder->CreateBr(doFinalBlock);
516
517    iBuilder->SetInsertPoint(doFinalBlock);
518
519    createFinalBlockCall(self, finalBlockRemainingBytes);
520
521    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
522        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
[5175]523        iBuilder->CreateAlignedStore(consumerPos, inbufConsumerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
[5135]524    }
525    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5175]526        iBuilder->CreateAlignedStore(producerPos, outbufProducerPtrs[i], sizeof(size_t))->setOrdering(AtomicOrdering::Release);
[5135]527    }
528
529    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
530        Value * basePtr = getStreamSetBasePtr(self, mStreamSetOutputs[i].ssName);
531        mStreamSetOutputBuffers[i]->setEndOfInput(basePtr);
532    }
533
534    Value * nullVal = Constant::getNullValue(voidPtrTy);
535    Function * pthreadExitFunc = m->getFunction("pthread_exit");
536    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal}); 
537    exitThread->setDoesNotReturn();
538    iBuilder->CreateRetVoid();
539
540    return threadFunc;
541
542}
Note: See TracBrowser for help on using the repository browser.