source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5263

Last change on this file since 5263 was 5263, checked in by cameron, 11 months ago

New doSegment partial progress

File size: 27.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <llvm/IR/Value.h>               // for Value
8#include <llvm/Support/ErrorHandling.h>  // for report_fatal_error
9#include <toolchain.h>                   // for BufferSegments, SegmentSize
10#include "IR_Gen/idisa_builder.h"        // for IDISA_Builder
11#include "kernels/streamset.h"           // for StreamSetBuffer
12#include "llvm/ADT/StringRef.h"          // for StringRef, operator==
13#include "llvm/IR/CallingConv.h"         // for ::C
14#include "llvm/IR/Constant.h"            // for Constant
15#include "llvm/IR/Constants.h"           // for ConstantInt
16#include "llvm/IR/Function.h"            // for Function, Function::arg_iter...
17#include "llvm/IR/Instructions.h"        // for LoadInst (ptr only), PHINode
18#include "llvm/Support/Compiler.h"       // for LLVM_UNLIKELY
19namespace llvm { class BasicBlock; }
20namespace llvm { class Module; }
21namespace llvm { class Type; }
22
23using namespace llvm;
24using namespace kernel;
25using namespace parabix;
26
27KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
28                             std::string kernelName,
29                             std::vector<Binding> stream_inputs,
30                             std::vector<Binding> stream_outputs,
31                             std::vector<Binding> scalar_parameters,
32                             std::vector<Binding> scalar_outputs,
33                             std::vector<Binding> internal_scalars)
34: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars),
35mNoTerminateAttribute(false),
36mDoBlockUpdatesProducedItemCountsAttribute(false) {
37
38}
39
40unsigned KernelBuilder::addScalar(Type * type, const std::string & name) {
41    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
42        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
43    }
44    const auto index = mKernelFields.size();
45    mKernelMap.emplace(name, index);
46    mKernelFields.push_back(type);
47    return index;
48}
49
50void KernelBuilder::prepareKernel() {
51    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
52        llvm::report_fatal_error("Cannot prepare kernel after kernel state finalized");
53    }
54    unsigned blockSize = iBuilder->getBitBlockWidth();
55    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
56        std::string tmp;
57        raw_string_ostream out(tmp);
58        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
59            << mStreamSetInputs.size() << " input stream sets.";
60        throw std::runtime_error(out.str());
61    }
62    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
63        std::string tmp;
64        raw_string_ostream out(tmp);
65        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
66            << mStreamSetOutputs.size() << " output stream sets.";
67        throw std::runtime_error(out.str());
68    }
69    int streamSetNo = 0;
70    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
71        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
72             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
73        }
74        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
75        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
76        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
77        streamSetNo++;
78    }
79    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
80        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
81        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
82        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
83        streamSetNo++;
84    }
85    for (auto binding : mScalarInputs) {
86        addScalar(binding.type, binding.name);
87    }
88    for (auto binding : mScalarOutputs) {
89        addScalar(binding.type, binding.name);
90    }
91    for (auto binding : mInternalScalars) {
92        addScalar(binding.type, binding.name);
93    }
94    addScalar(iBuilder->getSizeTy(), blockNoScalar);
95    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
96    addScalar(iBuilder->getInt1Ty(), terminationSignal);
97    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
98}
99
100std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
101    auto saveModule = iBuilder->getModule();
102    auto savePoint = iBuilder->saveIP();
103    auto module = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
104    iBuilder->setModule(module.get());
105    generateKernel(inputs, outputs);
106    iBuilder->setModule(saveModule);
107    iBuilder->restoreIP(savePoint);
108    return module;
109}
110
111void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
112    auto savePoint = iBuilder->saveIP();
113    Module * const m = iBuilder->getModule();
114    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
115    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
116    prepareKernel();            // possibly overridden by the KernelBuilder subtype
117    addKernelDeclarations(m);
118    generateInitMethod();       // possibly overridden by the KernelBuilder subtype
119    generateDoBlockMethod();    // must be implemented by the KernelBuilder subtype
120    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
121    generateDoSegmentMethod();
122
123    // Implement the accumulator get functions
124    for (auto binding : mScalarOutputs) {
125        auto fnName = mKernelName + accumulator_infix + binding.name;
126        Function * accumFn = m->getFunction(fnName);
127        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
128        Value * self = &*(accumFn->arg_begin());
129        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
130        Value * retVal = iBuilder->CreateLoad(ptr);
131        iBuilder->CreateRet(retVal);
132    }
133    iBuilder->restoreIP(savePoint);
134}
135
136// Default init method, possibly overridden if special init actions required.
137void KernelBuilder::generateInitMethod() const {
138    auto savePoint = iBuilder->saveIP();
139    Module * const m = iBuilder->getModule();
140    Function * initFunction = m->getFunction(mKernelName + init_suffix);
141    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));   
142    Function::arg_iterator args = initFunction->arg_begin();
143    Value * self = &*(args++);
144    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), self);
145    for (auto binding : mScalarInputs) {
146        Value * param = &*(args++);
147        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
148        iBuilder->CreateStore(param, ptr);
149    }
150    iBuilder->CreateRetVoid();
151    iBuilder->restoreIP(savePoint);
152}
153
154//  The default finalBlock method simply dispatches to the doBlock routine.
155void KernelBuilder::generateFinalBlockMethod() const {
156    auto savePoint = iBuilder->saveIP();
157    Module * m = iBuilder->getModule();
158    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
159    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
160    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
161    // Final Block arguments: self, remaining, then the standard DoBlock args.
162    Function::arg_iterator args = finalBlockFunction->arg_begin();
163    Value * self = &*(args++);
164    /* Skip "remaining" arg */ args++;
165    std::vector<Value *> doBlockArgs = {self};
166    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
167    iBuilder->CreateRetVoid();
168    iBuilder->restoreIP(savePoint);
169}
170
171// Note: this may be overridden to incorporate doBlock logic directly into
172// the doSegment function.
173void KernelBuilder::generateDoBlockLogic(Value * self, Value * /* blockNo */) const {
174    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
175    iBuilder->CreateCall(doBlockFunction, self);
176}
177
178
179//  The default doSegment method dispatches to the doBlock routine for
180//  each block of the given number of blocksToDo, and then updates counts.
181void KernelBuilder::generateDoSegmentMethod() const {
182    auto savePoint = iBuilder->saveIP();
183    Module * m = iBuilder->getModule();
184    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
185    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), mKernelName + "_entry", doSegmentFunction, 0));
186    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
187    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopCond", doSegmentFunction, 0);
188    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_strideLoopBody", doSegmentFunction, 0);
189    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_stridesDone", doSegmentFunction, 0);
190    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_doFinalBlock", doSegmentFunction, 0);
191    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_segmentDone", doSegmentFunction, 0);
192    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), mKernelName + "_finalExit", doSegmentFunction, 0);
193    Type * const size_ty = iBuilder->getSizeTy();
194    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
195    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
196   
197    Function::arg_iterator args = doSegmentFunction->arg_begin();
198    Value * self = &*(args++);
199    Value * doFinal = &*(args++);
200   
201    std::vector<Value *> producerPos;
202    producerPos.push_back(&*(args++));
203    Value * availablePos = producerPos[0];
204    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
205        Value * p = &*(args++);
206        producerPos.push_back(p);
207        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
208    }
209    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
210    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
211    Value * stridesToDo = iBuilder->CreateUDiv(itemsAvail, stride);
212    iBuilder->CreateBr(strideLoopCond);
213
214    iBuilder->SetInsertPoint(strideLoopCond);
215    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
216    stridesRemaining->addIncoming(stridesToDo, entryBlock);
217    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
218    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
219
220    iBuilder->SetInsertPoint(strideLoopBody);
221    Value * blockNo = getScalarField(self, blockNoScalar);   
222
223    generateDoBlockLogic(self, blockNo);
224    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
225    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
226    iBuilder->CreateBr(strideLoopCond);
227   
228    iBuilder->SetInsertPoint(stridesDone);
229    // Update counts for the full strides processed.
230    Value * segmentItemsProcessed = iBuilder->CreateMul(stridesToDo, stride);
231    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
232        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
233        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, segmentItemsProcessed));
234    }
235    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
236        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
237            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
238            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, segmentItemsProcessed));
239        }
240    }
241   
242    // Now conditionally perform the final block processing depending on the doFinal parameter.
243    iBuilder->CreateCondBr(doFinal, doFinalBlock, segmentDone);
244    iBuilder->SetInsertPoint(doFinalBlock);
245
246    Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
247    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
248   
249    createFinalBlockCall(self, remainingItems);
250    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
251        Value * preProcessed = getProcessedItemCount(self, mStreamSetInputs[i].name);
252        setProcessedItemCount(self, mStreamSetInputs[i].name, iBuilder->CreateAdd(preProcessed, remainingItems));
253    }
254    if (!mDoBlockUpdatesProducedItemCountsAttribute) {
255        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
256            Value * preProduced = getProducedItemCount(self, mStreamSetOutputs[i].name);
257            setProducedItemCount(self, mStreamSetOutputs[i].name, iBuilder->CreateAdd(preProduced, remainingItems));
258        }
259    }
260    iBuilder->CreateBr(segmentDone);
261   
262    iBuilder->SetInsertPoint(segmentDone);
263//#ifndef NDEBUG
264//    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
265//#endif
266    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
267        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
268        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
269        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
270        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
271    }
272    iBuilder->CreateBr(finalExit);
273    iBuilder->SetInsertPoint(finalExit);
274
275    iBuilder->CreateRetVoid();
276    iBuilder->restoreIP(savePoint);
277}
278
279
280ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
281    const auto f = mKernelMap.find(name);
282    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
283        llvm::report_fatal_error("Kernel does not contain scalar: " + name);
284    }
285    return iBuilder->getInt32(f->second);
286}
287
288unsigned KernelBuilder::getScalarCount() const {
289    return mKernelFields.size();
290}
291
292Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
293    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
294}
295
296Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
297    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
298}
299
300void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * newFieldVal) const {
301    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
302}
303
304LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
305    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
306    return iBuilder->CreateAtomicLoadAcquire(ptr);
307}
308
309Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
310    return getScalarField(self, ssName + processedItemCountSuffix);
311}
312
313Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
314    return getScalarField(self, ssName + producedItemCountSuffix);
315}
316
317Value * KernelBuilder::getTerminationSignal(Value * self) const {
318    return getScalarField(self, terminationSignal);
319}
320
321void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
322    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
323    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
324}
325
326void KernelBuilder::setProcessedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
327    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + processedItemCountSuffix)});
328    iBuilder->CreateStore(newCount, ptr);
329}
330
331void KernelBuilder::setProducedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
332    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + producedItemCountSuffix)});
333    iBuilder->CreateStore(newCount, ptr);
334}
335
336void KernelBuilder::setTerminationSignal(Value * self) const {
337    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
338    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
339}
340
341Value * KernelBuilder::getBlockNo(Value * self) const {
342    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
343    return iBuilder->CreateLoad(ptr);
344}
345
346void KernelBuilder::setBlockNo(Value * self, Value * value) const {
347    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
348    iBuilder->CreateStore(value, ptr);
349}
350
351
352Value * KernelBuilder::getParameter(Function * f, const std::string & paramName) const {
353    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
354        Value * arg = &*argIter;
355        if (arg->getName() == paramName) return arg;
356    }
357    llvm::report_fatal_error("Method does not have parameter: " + paramName);
358}
359
360unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
361    const auto f = mStreamSetNameMap.find(name);
362    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
363        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
364    }
365    return f->second;
366}
367
368Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
369    return getScalarField(self, name + structPtrSuffix);
370}
371
372inline const StreamSetBuffer * KernelBuilder::getStreamSetBuffer(const std::string & name) const {
373    const unsigned structIdx = getStreamSetIndex(name);
374    if (structIdx < mStreamSetInputs.size()) {
375        return mStreamSetInputBuffers[structIdx];
376    } else {
377        return mStreamSetOutputBuffers[structIdx - mStreamSetInputs.size()];
378    }
379}
380
381Value * KernelBuilder::getStreamSetPtr(Value * self, const std::string & name, Value * blockNo) const {
382    return getStreamSetBuffer(name)->getStreamSetPtr(getStreamSetStructPtr(self, name), blockNo);
383}
384
385Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) const {
386    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index);
387}
388
389Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index1, Value * index2) const {
390    assert (index1->getType() == index2->getType());
391    return getStreamSetBuffer(name)->getStream(getStreamSetStructPtr(self, name), blockNo, index1, index2);
392}
393
394Value * KernelBuilder::getStreamView(Value * self, const std::string & name, Value * blockNo, Value * index) const {
395    return getStreamSetBuffer(name)->getStreamView(getStreamSetStructPtr(self, name), blockNo, index);
396}
397
398Value * KernelBuilder::getStreamView(llvm::Type * type, Value * self, const std::string & name, Value * blockNo, Value * index) const {
399    return getStreamSetBuffer(name)->getStreamView(type, getStreamSetStructPtr(self, name), blockNo, index);
400}
401
402void KernelBuilder::createInstance() {
403    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
404        llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
405    }
406    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
407    Module * m = iBuilder->getModule();
408    std::vector<Value *> init_args = {mKernelInstance};
409    for (auto a : mInitialArguments) {
410        init_args.push_back(a);
411    }
412    for (auto b : mStreamSetInputBuffers) {
413        init_args.push_back(b->getStreamSetStructPtr());
414    }
415    for (auto b : mStreamSetOutputBuffers) {
416        init_args.push_back(b->getStreamSetStructPtr());
417    }
418    std::string initFnName = mKernelName + init_suffix;
419    Function * initMethod = m->getFunction(initFnName);
420    if (!initMethod) {
421        llvm::report_fatal_error("Cannot find " + initFnName);
422    }
423    iBuilder->CreateCall(initMethod, init_args);
424}
425
426Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
427    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
428        llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
429    }
430    Module * m = iBuilder->getModule();
431    Type * const voidTy = iBuilder->getVoidTy();
432    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
433    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
434    Type * const int1ty = iBuilder->getInt1Ty();
435   
436    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
437    threadFunc->setCallingConv(CallingConv::C);
438    Function::arg_iterator args = threadFunc->arg_begin();
439   
440    Value * const arg = &*(args++);
441    arg->setName("args");
442   
443    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
444   
445    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
446   
447    std::vector<Value *> inbufProducerPtrs;
448    std::vector<Value *> inbufConsumerPtrs;
449    std::vector<Value *> outbufProducerPtrs;
450    std::vector<Value *> outbufConsumerPtrs;   
451    std::vector<Value *> endSignalPtrs;
452   
453    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
454        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
455        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
456        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
457        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
458    }
459    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
460        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
461        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
462        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
463    }
464   
465    const unsigned segmentBlocks = codegen::SegmentSize;
466    const unsigned bufferSegments = codegen::BufferSegments;
467    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
468    Type * const size_ty = iBuilder->getSizeTy();
469   
470    Value * segSize = ConstantInt::get(size_ty, segmentSize);
471    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
472   
473    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
474    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
475   
476    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
477    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
478    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
479   
480    iBuilder->CreateBr(outputCheckBlock);
481   
482    iBuilder->SetInsertPoint(outputCheckBlock);
483   
484    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
485    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
486        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
487        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
488        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
489        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
490        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
491    }
492   
493    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
494   
495    iBuilder->SetInsertPoint(inputCheckBlock); 
496   
497    Value * requiredSize = segSize;
498    if (mLookAheadPositions > 0) {
499        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
500    }
501    waitCondTest = ConstantInt::get(int1ty, 1); 
502    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
503        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
504        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
505        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
506        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
507        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
508    }
509   
510    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
511   
512    iBuilder->SetInsertPoint(endSignalCheckBlock);
513   
514    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
515    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
516        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
517        iBuilder->CreateAnd(endSignal, endSignal_next);
518    }
519   
520    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
521   
522    iBuilder->SetInsertPoint(doSegmentBlock);
523   
524    // needs positions
525    createDoSegmentCall({self, ConstantInt::getNullValue(iBuilder->getInt1Ty())});
526   
527    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
528        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
529        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
530    }
531   
532    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
533        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
534        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
535    }
536   
537    Value * earlyEndSignal = getTerminationSignal(self);
538    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
539        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
540        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
541       
542        iBuilder->SetInsertPoint(earlyEndBlock);
543        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
544            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
545            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
546        }       
547    }
548    iBuilder->CreateBr(outputCheckBlock);
549   
550    iBuilder->SetInsertPoint(endBlock);
551    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
552    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
553    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
554   
555        // needs positions
556    createDoSegmentCall({self, ConstantInt::getAllOnesValue(iBuilder->getInt1Ty())});
557   
558   
559    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
560        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
561        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
562    }
563    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
564        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
565    }
566   
567    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
568        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
569        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
570    }
571   
572    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
573    iBuilder->CreateRetVoid();
574   
575    return threadFunc;
576   
577}
578
579KernelBuilder::~KernelBuilder() {
580}
Note: See TracBrowser for help on using the repository browser.