source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 5257

Last change on this file since 5257 was 5257, checked in by cameron, 2 years ago

finalSegment kernel methods initial check-in

File size: 30.5 KB
RevLine 
[4924]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
[5063]7#include <llvm/IR/Module.h>
8#include <llvm/IR/Type.h>
9#include <llvm/IR/Value.h>
10#include <llvm/Support/raw_ostream.h>
[5174]11#include <llvm/Support/ErrorHandling.h>
[5135]12#include <toolchain.h>
[4924]13
[4959]14using namespace llvm;
[5063]15using namespace kernel;
[4959]16
[5063]17KernelBuilder::KernelBuilder(IDISA::IDISA_Builder * builder,
[5246]18                             std::string kernelName,
19                             std::vector<Binding> stream_inputs,
20                             std::vector<Binding> stream_outputs,
21                             std::vector<Binding> scalar_parameters,
22                             std::vector<Binding> scalar_outputs,
23                             std::vector<Binding> internal_scalars)
[5252]24: KernelInterface(builder, kernelName, stream_inputs, stream_outputs, scalar_parameters, scalar_outputs, internal_scalars),
25mNoTerminateAttribute(false) {
[4974]26
[5227]27}
28
[5246]29unsigned KernelBuilder::addScalar(Type * type, const std::string & name) {
[5063]30    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
[5227]31        llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
[4924]32    }
[5227]33    const auto index = mKernelFields.size();
34    mKernelMap.emplace(name, index);
35    mKernelFields.push_back(type);
36    return index;
[4924]37}
[4968]38
[5076]39void KernelBuilder::prepareKernel() {
[5246]40    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
41        llvm::report_fatal_error("Cannot prepare kernel after kernel state finalized");
42    }
[5142]43    unsigned blockSize = iBuilder->getBitBlockWidth();
[5133]44    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
[5202]45        std::string tmp;
46        raw_string_ostream out(tmp);
47        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
48            << mStreamSetInputs.size() << " input stream sets.";
[5217]49        throw std::runtime_error(out.str());
[5133]50    }
51    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
[5202]52        std::string tmp;
53        raw_string_ostream out(tmp);
54        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
55            << mStreamSetOutputs.size() << " output stream sets.";
[5217]56        throw std::runtime_error(out.str());
[5133]57    }
[5104]58    int streamSetNo = 0;
[5133]59    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
[5174]60        if ((mStreamSetInputBuffers[i]->getBufferSize() > 0) && (mStreamSetInputBuffers[i]->getBufferSize() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
[5202]61             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
[5142]62        }
[5202]63        mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getStreamSetStructPointerType(), mStreamSetInputs[i].name + structPtrSuffix});
64        mStreamSetNameMap.emplace(mStreamSetInputs[i].name, streamSetNo);
[5247]65        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + processedItemCountSuffix);
[5104]66        streamSetNo++;
[5086]67    }
[5133]68    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5202]69        mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getStreamSetStructPointerType(), mStreamSetOutputs[i].name + structPtrSuffix});
70        mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, streamSetNo);
[5247]71        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + producedItemCountSuffix);
[5104]72        streamSetNo++;
[5086]73    }
[5076]74    for (auto binding : mScalarInputs) {
[5202]75        addScalar(binding.type, binding.name);
[5076]76    }
77    for (auto binding : mScalarOutputs) {
[5202]78        addScalar(binding.type, binding.name);
[5076]79    }
80    for (auto binding : mInternalScalars) {
[5202]81        addScalar(binding.type, binding.name);
[5076]82    }
[5227]83    addScalar(iBuilder->getSizeTy(), blockNoScalar);
84    addScalar(iBuilder->getSizeTy(), logicalSegmentNoScalar);
85    addScalar(iBuilder->getInt1Ty(), terminationSignal);
[5175]86    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, mKernelName);
[4970]87}
88
[5246]89std::unique_ptr<Module> KernelBuilder::createKernelModule(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
90    auto saveModule = iBuilder->getModule();
[5202]91    auto savePoint = iBuilder->saveIP();
[5246]92    auto module = make_unique<Module>(mKernelName + "_" + iBuilder->getBitBlockTypeName(), iBuilder->getContext());
93    iBuilder->setModule(module.get());
94    generateKernel(inputs, outputs);
[5063]95    iBuilder->setModule(saveModule);
96    iBuilder->restoreIP(savePoint);
[5246]97    return module;
[4970]98}
99
[5246]100void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
[5202]101    auto savePoint = iBuilder->saveIP();
[5227]102    Module * const m = iBuilder->getModule();
[5246]103    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
104    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
105    prepareKernel();            // possibly overridden by the KernelBuilder subtype
[5227]106    addKernelDeclarations(m);
[5250]107    generateInitMethod();       // possibly overridden by the KernelBuilder subtype
[5246]108    generateDoBlockMethod();    // must be implemented by the KernelBuilder subtype
109    generateFinalBlockMethod(); // possibly overridden by the KernelBuilder subtype
[5086]110    generateDoSegmentMethod();
[5257]111    generateFinalSegmentMethod();
[5074]112
[5063]113    // Implement the accumulator get functions
114    for (auto binding : mScalarOutputs) {
[5202]115        auto fnName = mKernelName + accumulator_infix + binding.name;
[5063]116        Function * accumFn = m->getFunction(fnName);
[5202]117        iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, accumFn, 0));
[5063]118        Value * self = &*(accumFn->arg_begin());
[5202]119        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
[5063]120        Value * retVal = iBuilder->CreateLoad(ptr);
121        iBuilder->CreateRet(retVal);
[4995]122    }
[5250]123    generateInitMethod();
124    iBuilder->restoreIP(savePoint);
125}
[5246]126
[5250]127// Default init method, possibly overridden if special init actions required.
128void KernelBuilder::generateInitMethod() const {
129    auto savePoint = iBuilder->saveIP();
130    Module * const m = iBuilder->getModule();
[5063]131    Function * initFunction = m->getFunction(mKernelName + init_suffix);
[5246]132    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "Init_entry", initFunction, 0));   
[5063]133    Function::arg_iterator args = initFunction->arg_begin();
[5051]134    Value * self = &*(args++);
[5250]135    iBuilder->CreateStore(ConstantAggregateZero::get(mKernelStateType), self);
[5063]136    for (auto binding : mScalarInputs) {
[5246]137        Value * param = &*(args++);
[5202]138        Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
[5246]139        iBuilder->CreateStore(param, ptr);
[5051]140    }
141    iBuilder->CreateRetVoid();
[5063]142    iBuilder->restoreIP(savePoint);
[5051]143}
144
[5074]145//  The default finalBlock method simply dispatches to the doBlock routine.
[5246]146void KernelBuilder::generateFinalBlockMethod() const {
[5202]147    auto savePoint = iBuilder->saveIP();
[5074]148    Module * m = iBuilder->getModule();
[5063]149    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
150    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
151    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
152    // Final Block arguments: self, remaining, then the standard DoBlock args.
153    Function::arg_iterator args = finalBlockFunction->arg_begin();
154    Value * self = &*(args++);
155    /* Skip "remaining" arg */ args++;
156    std::vector<Value *> doBlockArgs = {self};
157    while (args != finalBlockFunction->arg_end()){
158        doBlockArgs.push_back(&*args++);
159    }
[5115]160    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
[5111]161    iBuilder->CreateRetVoid();
[5063]162    iBuilder->restoreIP(savePoint);
[4986]163}
[4924]164
[5185]165// Note: this may be overridden to incorporate doBlock logic directly into
166// the doSegment function.
[5246]167void KernelBuilder::generateDoBlockLogic(Value * self, Value * /* blockNo */) const {
[5174]168    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
[5246]169    iBuilder->CreateCall(doBlockFunction, self);
[5174]170}
171
[5257]172
[5174]173//  The default doSegment method dispatches to the doBlock routine for
174//  each block of the given number of blocksToDo, and then updates counts.
[5246]175void KernelBuilder::generateDoSegmentMethod() const {
[5202]176    auto savePoint = iBuilder->saveIP();
[5086]177    Module * m = iBuilder->getModule();
178    Function * doSegmentFunction = m->getFunction(mKernelName + doSegment_suffix);
179    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doSegmentFunction, 0));
180    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
[5194]181    BasicBlock * strideLoopCond = BasicBlock::Create(iBuilder->getContext(), "strideLoopCond", doSegmentFunction, 0);
182    BasicBlock * strideLoopBody = BasicBlock::Create(iBuilder->getContext(), "strideLoopBody", doSegmentFunction, 0);
183    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", doSegmentFunction, 0);
[5188]184    BasicBlock * segmentDone = BasicBlock::Create(iBuilder->getContext(), "segmentDone", doSegmentFunction, 0);
[5194]185    BasicBlock * finalExit = BasicBlock::Create(iBuilder->getContext(), "finalExit", doSegmentFunction, 0);
[5165]186    Type * const size_ty = iBuilder->getSizeTy();
[5183]187    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
[5174]188    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
[5086]189   
190    Function::arg_iterator args = doSegmentFunction->arg_begin();
191    Value * self = &*(args++);
192    Value * blocksToDo = &*(args);
[5188]193   
[5183]194    std::vector<Value *> inbufProducerPtrs;
[5188]195    std::vector<Value *> endSignalPtrs;
[5183]196    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
[5246]197        Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
198        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
199        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
[5183]200    }
201   
[5188]202    std::vector<Value *> producerPos;
[5183]203    /* Determine the actually available data examining all input stream sets. */
[5192]204    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
[5188]205    producerPos.push_back(p);
206    Value * availablePos = producerPos[0];
[5183]207    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
[5192]208        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
[5188]209        producerPos.push_back(p);
[5183]210        /* Set the available position to be the minimum of availablePos and producerPos. */
[5188]211        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
[5183]212    }
[5247]213    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
[5183]214    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
[5257]215#ifndef NDEBUG
216    iBuilder->CallPrintInt(mKernelName + "_itemsAvail", itemsAvail);
217#endif
[5194]218    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
219    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
[5183]220    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
[5194]221    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
222    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
223    //iBuilder->CallPrintInt(mKernelName + "_stridesAvail", stridesAvail);
224    iBuilder->CreateBr(strideLoopCond);
[5165]225
[5194]226    iBuilder->SetInsertPoint(strideLoopCond);
227    PHINode * stridesRemaining = iBuilder->CreatePHI(size_ty, 2, "stridesRemaining");
228    stridesRemaining->addIncoming(stridesToDo, entryBlock);
229    Value * notDone = iBuilder->CreateICmpUGT(stridesRemaining, ConstantInt::get(size_ty, 0));
230    iBuilder->CreateCondBr(notDone, strideLoopBody, stridesDone);
[5165]231
[5194]232    iBuilder->SetInsertPoint(strideLoopBody);
[5165]233    Value * blockNo = getScalarField(self, blockNoScalar);   
[5185]234
[5174]235    generateDoBlockLogic(self, blockNo);
236    setBlockNo(self, iBuilder->CreateAdd(blockNo, strideBlocks));
[5194]237    stridesRemaining->addIncoming(iBuilder->CreateSub(stridesRemaining, ConstantInt::get(size_ty, 1)), strideLoopBody);
238    iBuilder->CreateBr(strideLoopCond);
[5111]239   
[5194]240    iBuilder->SetInsertPoint(stridesDone);
241    processed = iBuilder->CreateAdd(processed, iBuilder->CreateMul(stridesToDo, stride));
[5247]242    setProcessedItemCount(self, mStreamSetInputs[0].name, processed);
[5257]243    iBuilder->CreateBr(segmentDone);
244    iBuilder->SetInsertPoint(segmentDone);
245#ifndef NDEBUG
246    iBuilder->CallPrintInt(mKernelName + "_processed", processed);
247#endif
248    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
249        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
250        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
251        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
252        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
253    }
254    iBuilder->CreateBr(finalExit);
255    iBuilder->SetInsertPoint(finalExit);
256
257    iBuilder->CreateRetVoid();
258    iBuilder->restoreIP(savePoint);
259}
260
261void KernelBuilder::generateFinalSegmentMethod() const {
262    auto savePoint = iBuilder->saveIP();
263    Module * m = iBuilder->getModule();
264    Function * finalSegmentFunction = m->getFunction(mKernelName + finalSegment_suffix);
265    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalSegmentFunction, 0));
266    BasicBlock * doStrides = BasicBlock::Create(iBuilder->getContext(), "doStrides", finalSegmentFunction, 0);
267    BasicBlock * stridesDone = BasicBlock::Create(iBuilder->getContext(), "stridesDone", finalSegmentFunction, 0);
268    Type * const size_ty = iBuilder->getSizeTy();
269    Constant * stride = ConstantInt::get(size_ty, iBuilder->getStride());
270    Value * strideBlocks = ConstantInt::get(size_ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
271    Function::arg_iterator args = finalSegmentFunction->arg_begin();
272    Value * self = &*(args++);
273    Value * blocksToDo = &*(args);
274    std::vector<Value *> inbufProducerPtrs;
275    std::vector<Value *> endSignalPtrs;
276    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
277        Value * param = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
278        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(param));
279        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(param));
280    }
[5188]281   
[5257]282    std::vector<Value *> producerPos;
283    /* Determine the actually available data examining all input stream sets. */
284    LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[0]);
285    producerPos.push_back(p);
286    Value * availablePos = producerPos[0];
287    for (unsigned i = 1; i < inbufProducerPtrs.size(); i++) {
288        LoadInst * p = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
289        producerPos.push_back(p);
290        /* Set the available position to be the minimum of availablePos and producerPos. */
291        availablePos = iBuilder->CreateSelect(iBuilder->CreateICmpULT(availablePos, p), availablePos, p);
[5188]292    }
[5257]293    Value * processed = getProcessedItemCount(self, mStreamSetInputs[0].name);
294    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
295#ifndef NDEBUG
296    iBuilder->CallPrintInt(mKernelName + "_itemsAvail final", itemsAvail);
297#endif
298    Value * stridesToDo = iBuilder->CreateUDiv(blocksToDo, strideBlocks);
299    Value * stridesAvail = iBuilder->CreateUDiv(itemsAvail, stride);
300    /* Adjust the number of full blocks to do, based on the available data, if necessary. */
301    Value * lessThanFullSegment = iBuilder->CreateICmpULT(stridesAvail, stridesToDo);
302    stridesToDo = iBuilder->CreateSelect(lessThanFullSegment, stridesAvail, stridesToDo);
303    Value * notDone = iBuilder->CreateICmpUGT(stridesToDo, ConstantInt::get(size_ty, 0));
304    iBuilder->CreateCondBr(notDone, doStrides, stridesDone);
305   
306    iBuilder->SetInsertPoint(doStrides);
307    createDoSegmentCall(self, blocksToDo);
308    iBuilder->CreateBr(stridesDone);
[5188]309   
[5257]310    iBuilder->SetInsertPoint(stridesDone);
311    /* Now at most a partial block remains. */
[5188]312   
[5257]313    processed = getProcessedItemCount(self, mStreamSetInputs[0].name);   
314    Value * remainingItems = iBuilder->CreateSub(producerPos[0], processed);
315    //iBuilder->CallPrintInt(mKernelName + " remainingItems", remainingItems);
316       
[5188]317    createFinalBlockCall(self, remainingItems);
[5257]318    processed = iBuilder->CreateAdd(processed, remainingItems);
319    setProcessedItemCount(self, mStreamSetInputs[0].name, processed);
320       
321#ifndef NDEBUG
322    iBuilder->CallPrintInt(mKernelName + "_processed final", processed);
323#endif
[5188]324    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5247]325        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
[5202]326        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
[5185]327        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
[5192]328        iBuilder->CreateAtomicStoreRelease(produced, producerPosPtr);
[5183]329    }
[5174]330
[5111]331    iBuilder->CreateRetVoid();
[5257]332
[5086]333    iBuilder->restoreIP(savePoint);
334}
335
[5257]336
337
[5227]338ConstantInt * KernelBuilder::getScalarIndex(const std::string & name) const {
339    const auto f = mKernelMap.find(name);
340    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
[5246]341        llvm::report_fatal_error("Kernel does not contain scalar: " + name);
[5000]342    }
[5104]343    return iBuilder->getInt32(f->second);
[4959]344}
[4924]345
[5227]346unsigned KernelBuilder::getScalarCount() const {
347    return mKernelFields.size();
348}
349
[5246]350Value * KernelBuilder::getScalarFieldPtr(Value * self, const std::string & fieldName) const {
[5202]351    return iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(fieldName)});
352}
[5109]353
[5246]354Value * KernelBuilder::getScalarField(Value * self, const std::string & fieldName) const {
[5202]355    return iBuilder->CreateLoad(getScalarFieldPtr(self, fieldName));
[4924]356}
357
[5246]358void KernelBuilder::setScalarField(Value * self, const std::string & fieldName, Value * newFieldVal) const {
[5202]359    iBuilder->CreateStore(newFieldVal, getScalarFieldPtr(self, fieldName));
[5008]360}
[5063]361
[5246]362LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * self) const {
[5174]363    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
[5246]364    return iBuilder->CreateAtomicLoadAcquire(ptr);
[5174]365}
366
[5247]367Value * KernelBuilder::getProcessedItemCount(Value * self, const std::string & ssName) const {
368    return getScalarField(self, ssName + processedItemCountSuffix);
[5174]369}
370
[5247]371Value * KernelBuilder::getProducedItemCount(Value * self, const std::string & ssName) const {
372    return getScalarField(self, ssName + producedItemCountSuffix);
[5174]373}
374
[5246]375Value * KernelBuilder::getTerminationSignal(Value * self) const {
[5194]376    return getScalarField(self, terminationSignal);
[5174]377}
378
[5246]379void KernelBuilder::releaseLogicalSegmentNo(Value * self, Value * newCount) const {
[5174]380    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(logicalSegmentNoScalar)});
[5192]381    iBuilder->CreateAtomicStoreRelease(newCount, ptr);
[5174]382}
383
[5247]384void KernelBuilder::setProcessedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
385    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + processedItemCountSuffix)});
[5174]386    iBuilder->CreateStore(newCount, ptr);
387}
388
[5247]389void KernelBuilder::setProducedItemCount(Value * self, const std::string & ssName, Value * newCount) const {
390    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(ssName + producedItemCountSuffix)});
[5174]391    iBuilder->CreateStore(newCount, ptr);
392}
393
[5246]394void KernelBuilder::setTerminationSignal(Value * self) const {
[5194]395    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(terminationSignal)});
396    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt1Ty(), 1), ptr);
[5174]397}
398
[5246]399Value * KernelBuilder::getBlockNo(Value * self) const {
[5165]400    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
[5246]401    return iBuilder->CreateLoad(ptr);
[5165]402}
[5063]403
[5246]404void KernelBuilder::setBlockNo(Value * self, Value * newFieldVal) const {
[5165]405    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(blockNoScalar)});
[5174]406    iBuilder->CreateStore(newFieldVal, ptr);
[5165]407}
408
409
[5246]410Value * KernelBuilder::getParameter(Function * f, const std::string & paramName) const {
[5063]411    for (Function::arg_iterator argIter = f->arg_begin(), end = f->arg_end(); argIter != end; argIter++) {
412        Value * arg = &*argIter;
413        if (arg->getName() == paramName) return arg;
[5051]414    }
[5174]415    llvm::report_fatal_error("Method does not have parameter: " + paramName);
[5051]416}
[5008]417
[5246]418unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
[5202]419    const auto f = mStreamSetNameMap.find(name);
[5104]420    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
[5202]421        llvm::report_fatal_error("Kernel does not contain stream set: " + name);
[5104]422    }
423    return f->second;
424}
[5063]425
[5246]426size_t KernelBuilder::getStreamSetBufferSize(Value * /* self */, const std::string & name) const {
[5202]427    const unsigned index = getStreamSetIndex(name);
428    StreamSetBuffer * buf = nullptr;
429    if (index < mStreamSetInputs.size()) {
430        buf = mStreamSetInputBuffers[index];
431    } else {
432        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
[5109]433    }
[5202]434    return buf->getBufferSize();
[5109]435}
436
[5246]437Value * KernelBuilder::getStreamSetStructPtr(Value * self, const std::string & name) const {
[5202]438    return getScalarField(self, name + structPtrSuffix);
[5104]439}
440
[5246]441Value * KernelBuilder::getStreamSetBlockPtr(Value * self, const std::string &name, Value * blockNo) const {
[5202]442    Value * const structPtr = getStreamSetStructPtr(self, name);
443    const unsigned index = getStreamSetIndex(name);
444    StreamSetBuffer * buf = nullptr;
445    if (index < mStreamSetInputs.size()) {
446        buf = mStreamSetInputBuffers[index];
447    } else {
448        buf = mStreamSetOutputBuffers[index - mStreamSetInputs.size()];
449    }   
450    return buf->getStreamSetBlockPointer(structPtr, blockNo);
[5104]451}
452
[5246]453Value * KernelBuilder::getStream(Value * self, const std::string & name, Value * blockNo, Value * index) {
454    return iBuilder->CreateGEP(getStreamSetBlockPtr(self, name, blockNo), {iBuilder->getInt32(0), index});
455}
456
[5220]457void KernelBuilder::createInstance() {
[5246]458    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
459        llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
460    }
[5220]461    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
[5133]462    Module * m = iBuilder->getModule();
[5220]463    std::vector<Value *> init_args = {mKernelInstance};
464    for (auto a : mInitialArguments) {
[5133]465        init_args.push_back(a);
466    }
[5202]467    for (auto b : mStreamSetInputBuffers) {
[5135]468        init_args.push_back(b->getStreamSetStructPtr());
[5133]469    }
[5202]470    for (auto b : mStreamSetOutputBuffers) {
[5135]471        init_args.push_back(b->getStreamSetStructPtr());
[5133]472    }
473    std::string initFnName = mKernelName + init_suffix;
474    Function * initMethod = m->getFunction(initFnName);
475    if (!initMethod) {
[5174]476        llvm::report_fatal_error("Cannot find " + initFnName);
[5133]477    }
478    iBuilder->CreateCall(initMethod, init_args);
479}
[5104]480
[5246]481Function * KernelBuilder::generateThreadFunction(const std::string & name) const {
482    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
483        llvm::report_fatal_error("Cannot generate thread function before calling prepareKernel()");
484    }
[5135]485    Module * m = iBuilder->getModule();
[5230]486    Type * const voidTy = iBuilder->getVoidTy();
[5227]487    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
[5135]488    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
489    Type * const int1ty = iBuilder->getInt1Ty();
[5104]490
[5135]491    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
492    threadFunc->setCallingConv(CallingConv::C);
493    Function::arg_iterator args = threadFunc->arg_begin();
[5104]494
[5135]495    Value * const arg = &*(args++);
496    arg->setName("args");
[5133]497
[5135]498    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc,0));
499
500    Value * self = iBuilder->CreateBitCast(arg, PointerType::get(mKernelStateType, 0));
501
502    std::vector<Value *> inbufProducerPtrs;
503    std::vector<Value *> inbufConsumerPtrs;
504    std::vector<Value *> outbufProducerPtrs;
505    std::vector<Value *> outbufConsumerPtrs;   
506    std::vector<Value *> endSignalPtrs;
507
508    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
[5202]509        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetInputs[i].name);
[5185]510        inbufProducerPtrs.push_back(mStreamSetInputBuffers[i]->getProducerPosPtr(ssStructPtr));
511        inbufConsumerPtrs.push_back(mStreamSetInputBuffers[i]->getConsumerPosPtr(ssStructPtr));
[5217]512        endSignalPtrs.push_back(mStreamSetInputBuffers[i]->getEndOfInputPtr(ssStructPtr));
[5135]513    }
514    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5202]515        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
[5185]516        outbufProducerPtrs.push_back(mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr));
517        outbufConsumerPtrs.push_back(mStreamSetOutputBuffers[i]->getConsumerPosPtr(ssStructPtr));
[5135]518    }
519
520    const unsigned segmentBlocks = codegen::SegmentSize;
521    const unsigned bufferSegments = codegen::BufferSegments;
522    const unsigned segmentSize = segmentBlocks * iBuilder->getBitBlockWidth();
523    Type * const size_ty = iBuilder->getSizeTy();
524
525    Value * segSize = ConstantInt::get(size_ty, segmentSize);
526    Value * bufferSize = ConstantInt::get(size_ty, segmentSize * (bufferSegments - 1));
527    Value * segBlocks = ConstantInt::get(size_ty, segmentBlocks);
528   
529    BasicBlock * outputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "outputCheck", threadFunc, 0);
530    BasicBlock * inputCheckBlock = BasicBlock::Create(iBuilder->getContext(), "inputCheck", threadFunc, 0);
531   
532    BasicBlock * endSignalCheckBlock = BasicBlock::Create(iBuilder->getContext(), "endSignalCheck", threadFunc, 0);
533    BasicBlock * doSegmentBlock = BasicBlock::Create(iBuilder->getContext(), "doSegment", threadFunc, 0);
534    BasicBlock * endBlock = BasicBlock::Create(iBuilder->getContext(), "end", threadFunc, 0);
535    BasicBlock * doFinalSegBlock = BasicBlock::Create(iBuilder->getContext(), "doFinalSeg", threadFunc, 0);
536    BasicBlock * doFinalBlock = BasicBlock::Create(iBuilder->getContext(), "doFinal", threadFunc, 0);
537
538    iBuilder->CreateBr(outputCheckBlock);
539
540    iBuilder->SetInsertPoint(outputCheckBlock);
541
542    Value * waitCondTest = ConstantInt::get(int1ty, 1);   
543    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5192]544        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(outbufProducerPtrs[i]);
[5135]545        // iBuilder->CallPrintInt(name + ":output producerPos", producerPos);
[5192]546        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(outbufConsumerPtrs[i]);
[5135]547        // iBuilder->CallPrintInt(name + ":output consumerPos", consumerPos);
548        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(producerPos, iBuilder->CreateAdd(consumerPos, bufferSize)));
549    }
550   
551    iBuilder->CreateCondBr(waitCondTest, inputCheckBlock, outputCheckBlock); 
552
553    iBuilder->SetInsertPoint(inputCheckBlock); 
554
[5174]555    Value * requiredSize = segSize;
556    if (mLookAheadPositions > 0) {
557        requiredSize = iBuilder->CreateAdd(segSize, ConstantInt::get(size_ty, mLookAheadPositions));
558    }
[5135]559    waitCondTest = ConstantInt::get(int1ty, 1); 
560    for (unsigned i = 0; i < inbufProducerPtrs.size(); i++) {
[5192]561        LoadInst * producerPos = iBuilder->CreateAtomicLoadAcquire(inbufProducerPtrs[i]);
[5135]562        // iBuilder->CallPrintInt(name + ":input producerPos", producerPos);
[5192]563        LoadInst * consumerPos = iBuilder->CreateAtomicLoadAcquire(inbufConsumerPtrs[i]);
[5135]564        // iBuilder->CallPrintInt(name + ":input consumerPos", consumerPos);
[5174]565        waitCondTest = iBuilder->CreateAnd(waitCondTest, iBuilder->CreateICmpULE(iBuilder->CreateAdd(consumerPos, requiredSize), producerPos));
[5135]566    }
567
568    iBuilder->CreateCondBr(waitCondTest, doSegmentBlock, endSignalCheckBlock);
569   
570    iBuilder->SetInsertPoint(endSignalCheckBlock);
571   
[5188]572    LoadInst * endSignal = iBuilder->CreateLoad(endSignalPtrs[0]);
[5135]573    for (unsigned i = 1; i < endSignalPtrs.size(); i++){
[5188]574        LoadInst * endSignal_next = iBuilder->CreateLoad(endSignalPtrs[i]);
[5135]575        iBuilder->CreateAnd(endSignal, endSignal_next);
576    }
577       
[5188]578    iBuilder->CreateCondBr(endSignal, endBlock, inputCheckBlock);
[5135]579   
580    iBuilder->SetInsertPoint(doSegmentBlock);
581 
582    createDoSegmentCall(self, segBlocks);
583
584    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
585        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), segSize);
[5192]586        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
[5135]587    }
[5174]588   
[5135]589    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5247]590        Value * produced = getProducedItemCount(self, mStreamSetOutputs[i].name);
[5192]591        iBuilder->CreateAtomicStoreRelease(produced, outbufProducerPtrs[i]);
[5135]592    }
593   
[5174]594    Value * earlyEndSignal = getTerminationSignal(self);
595    if (earlyEndSignal != ConstantInt::getNullValue(iBuilder->getInt1Ty())) {
596        BasicBlock * earlyEndBlock = BasicBlock::Create(iBuilder->getContext(), "earlyEndSignal", threadFunc, 0);
597        iBuilder->CreateCondBr(earlyEndSignal, earlyEndBlock, outputCheckBlock);
598
599        iBuilder->SetInsertPoint(earlyEndBlock);
600        for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5202]601            Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
[5185]602            mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
[5174]603        }       
604    }
[5135]605    iBuilder->CreateBr(outputCheckBlock);
606     
607    iBuilder->SetInsertPoint(endBlock);
608    LoadInst * producerPos = iBuilder->CreateLoad(inbufProducerPtrs[0]);
609    LoadInst * consumerPos = iBuilder->CreateLoad(inbufConsumerPtrs[0]);
610    Value * remainingBytes = iBuilder->CreateSub(producerPos, consumerPos);
611    Value * blockSize = ConstantInt::get(size_ty, iBuilder->getBitBlockWidth());
612    Value * blocks = iBuilder->CreateUDiv(remainingBytes, blockSize);
613    Value * finalBlockRemainingBytes = iBuilder->CreateURem(remainingBytes, blockSize);
614
615    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(blocks, ConstantInt::get(size_ty, 0)), doFinalBlock, doFinalSegBlock);
616
617    iBuilder->SetInsertPoint(doFinalSegBlock);
618
619    createDoSegmentCall(self, blocks);
620
621    iBuilder->CreateBr(doFinalBlock);
622
623    iBuilder->SetInsertPoint(doFinalBlock);
624
625    createFinalBlockCall(self, finalBlockRemainingBytes);
626
627    for (unsigned i = 0; i < inbufConsumerPtrs.size(); i++) {
628        Value * consumerPos = iBuilder->CreateAdd(iBuilder->CreateLoad(inbufConsumerPtrs[i]), remainingBytes);
[5192]629        iBuilder->CreateAtomicStoreRelease(consumerPos, inbufConsumerPtrs[i]);
[5135]630    }
631    for (unsigned i = 0; i < outbufProducerPtrs.size(); i++) {
[5192]632        iBuilder->CreateAtomicStoreRelease(producerPos, outbufProducerPtrs[i]);
[5135]633    }
634
635    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5202]636        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
[5185]637        mStreamSetOutputBuffers[i]->setEndOfInput(ssStructPtr);
[5135]638    }
639
[5242]640    iBuilder->CreatePThreadExitCall(Constant::getNullValue(voidPtrTy));
[5135]641    iBuilder->CreateRetVoid();
642
643    return threadFunc;
644
645}
[5246]646
647KernelBuilder::~KernelBuilder() {
648}
Note: See TracBrowser for help on using the repository browser.