source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 4981

Last change on this file since 4981 was 4974, checked in by nmedfort, 3 years ago

Added the kernel instance class; removed original mmap file access in favour of the boost mmap system. corrected PrintRegister? routine.

File size: 15.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <pablo/function.h>
8#include <IDISA/idisa_builder.h>
9#include <llvm/Support/CommandLine.h>
10#include <kernels/instance.h>
11
12using namespace llvm;
13using namespace pablo;
14
15static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
16
17inline bool isPowerOfTwo(const unsigned x) {
18    return (x != 0) && (x & (x - 1)) == 0;
19}
20
21namespace kernel {
22
23// sets name & sets internal state to the kernel superclass state
24KernelBuilder::KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b)
25: mMod(m)
26, iBuilder(b)
27, mKernelName(name)
28, mBitBlockType(b->getBitBlockType())
29, mBlockSize(b->getBitBlockWidth())
30, mBlocksPerSegment(SegmentSize)
31, mCircularBufferModulo(1)
32, mSegmentIndex(0)
33, mBlockIndex(0) {
34    assert (mBlocksPerSegment > 0);
35    mBlockIndex = addInternalState(b->getInt64Ty(), "BlockNo");
36}
37
38SlabAllocator<Instance> Instance::mAllocator; // static allocator declaration; should probably be in a "instance.cpp"
39
40/** ------------------------------------------------------------------------------------------------------------- *
41 * @brief addInternalState
42 ** ------------------------------------------------------------------------------------------------------------- */
43unsigned KernelBuilder::addInternalState(Type * const type) {
44    assert (type);
45    const unsigned index = mInternalState.size();
46    mInternalState.push_back(type);
47    return index;
48}
49
50unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string && name) {
51    if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
52        throw std::runtime_error("Kernel already contains internal state " + name);
53    }
54    const unsigned index = addInternalState(type);
55    mInternalStateNameMap.emplace(name, index);
56    return index;
57}
58
59
60/** ------------------------------------------------------------------------------------------------------------- *
61 * @brief getInternalState
62 ** ------------------------------------------------------------------------------------------------------------- */
63Value * KernelBuilder::getInternalState(Value * const instance, const unsigned index) {
64    Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(index)};
65    return iBuilder->CreateGEP(instance ? instance : mKernelParam, indices);
66}
67
68Value * KernelBuilder::getInternalState(Value * const instance, const std::string & name) {
69    const auto f = mInternalStateNameMap.find(name);
70    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
71        throw std::runtime_error("Kernel does not contain internal state " + name);
72    }
73    return getInternalState(instance, f->second);
74}
75
76/** ------------------------------------------------------------------------------------------------------------- *
77 * @brief setInternalState
78 ** ------------------------------------------------------------------------------------------------------------- */
79void KernelBuilder::setInternalState(Value * const instance, const std::string & name, Value * const value) {
80    Value * ptr = getInternalState(instance, name);
81    assert (ptr->getType()->getPointerElementType() == value->getType());
82    if (value->getType() == iBuilder->getBitBlockType()) {
83        iBuilder->CreateBlockAlignedStore(value, ptr);
84    } else {
85        iBuilder->CreateStore(value, ptr);
86    }
87}
88
89void KernelBuilder::setInternalState(Value * const instance, const unsigned index, Value * const value) {
90    Value * ptr = getInternalState(instance, index);
91    assert (ptr->getType()->getPointerElementType() == value->getType());
92    if (value->getType() == iBuilder->getBitBlockType()) {
93        iBuilder->CreateBlockAlignedStore(value, ptr);
94    } else {
95        iBuilder->CreateStore(value, ptr);
96    }
97}
98
99/** ------------------------------------------------------------------------------------------------------------- *
100 * @brief addInputStream
101 ** ------------------------------------------------------------------------------------------------------------- */
102void KernelBuilder::addInputStream(const unsigned fields, std::string && name) {
103    assert (fields > 0 && !name.empty());
104    mInputStreamName.push_back(name);
105    if (fields == 1){
106        mInputStream.push_back(mBitBlockType);
107    } else {
108        mInputStream.push_back(ArrayType::get(mBitBlockType, fields));
109    }
110}
111
112void KernelBuilder::addInputStream(const unsigned fields) {
113    addInputStream(fields, std::move(mKernelName + "_inputstream_" + std::to_string(mInputStream.size())));
114}
115
116/** ------------------------------------------------------------------------------------------------------------- *
117 * @brief getInputStream
118 ** ------------------------------------------------------------------------------------------------------------- */
119Value * KernelBuilder::getInputStream(llvm::Value * const instance, const unsigned index, const unsigned streamOffset) {
120    assert (instance);
121    Value * const indices[] = {getOffset(instance, streamOffset), iBuilder->getInt32(index)};
122    return iBuilder->CreateGEP(instance, indices);
123}
124
125/** ------------------------------------------------------------------------------------------------------------- *
126 * @brief addInputScalar
127 ** ------------------------------------------------------------------------------------------------------------- */
128void KernelBuilder::addInputScalar(Type * const type, std::string && name) {
129    assert (type && !name.empty());
130    mInputScalarName.push_back(name);
131    mInputScalar.push_back(type);
132}
133
134void KernelBuilder::addInputScalar(Type * const type) {
135    addInputScalar(type, std::move(mKernelName + "_inputscalar_" + std::to_string(mInputScalar.size())));
136}
137
138/** ------------------------------------------------------------------------------------------------------------- *
139 * @brief getInputScalar
140 ** ------------------------------------------------------------------------------------------------------------- */
141Value * KernelBuilder::getInputScalar(Value * const instance, const unsigned) {
142    throw std::runtime_error("currently not supported!");
143}
144
145/** ------------------------------------------------------------------------------------------------------------- *
146 * @brief addOutputStream
147 ** ------------------------------------------------------------------------------------------------------------- */
148unsigned KernelBuilder::addOutputStream(const unsigned fields) {
149    assert (fields > 0);
150    const unsigned index = mOutputStream.size();
151    mOutputStream.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
152    return index;
153}
154
155/** ------------------------------------------------------------------------------------------------------------- *
156 * @brief addOutputScalar
157 ** ------------------------------------------------------------------------------------------------------------- */
158unsigned KernelBuilder::addOutputScalar(Type * const type) {
159    assert (type);
160    const unsigned index = mOutputScalar.size();
161    mOutputScalar.push_back(type);
162    return index;
163}
164
165/** ------------------------------------------------------------------------------------------------------------- *
166 * @brief getOutputStream
167 ** ------------------------------------------------------------------------------------------------------------- */
168Value * KernelBuilder::getOutputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
169    assert (instance);
170    Value * const indices[] = {getOffset(instance, streamOffset), iBuilder->getInt32(1), iBuilder->getInt32(0), iBuilder->getInt32(index)};
171    return iBuilder->CreateGEP(instance, indices);
172}
173
174/** ------------------------------------------------------------------------------------------------------------- *
175 * @brief getOutputStreams
176 ** ------------------------------------------------------------------------------------------------------------- */
177Value * KernelBuilder::getOutputStreamSet(Value * const instance, const unsigned streamOffset) {
178    assert (instance);
179    Value * const indices[] = {getOffset(instance, streamOffset), iBuilder->getInt32(1)};
180    return iBuilder->CreateGEP(instance, indices);
181}
182
183/** ------------------------------------------------------------------------------------------------------------- *
184 * @brief getOutputScalar
185 ** ------------------------------------------------------------------------------------------------------------- */
186Value * KernelBuilder::getOutputScalar(Value * const instance, const unsigned) {
187    throw std::runtime_error("currently not supported!");
188}
189
190/** ------------------------------------------------------------------------------------------------------------- *
191 * @brief prepareFunction
192 ** ------------------------------------------------------------------------------------------------------------- */
193Function * KernelBuilder::prepareFunction() {   
194    if (mCircularBufferModulo > 1) {
195        mBlockIndex = addInternalState(iBuilder->getInt32Ty());
196    }
197    const unsigned capacity = mBlocksPerSegment + mCircularBufferModulo - 1;
198
199    mInputStreamType = PointerType::get(StructType::get(mMod->getContext(), mInputStream), 0);
200    mInputScalarType = PointerType::get(StructType::get(mMod->getContext(), mInputScalar), 0);
201    Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStream), capacity);
202    Type * outputAccumType = StructType::get(mMod->getContext(), mOutputScalar);
203    Type * internalStateType = StructType::create(mMod->getContext(), mInternalState, mKernelName);
204    mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({internalStateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
205
206    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
207        std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType}), false);
208
209    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
210    mFunction->setCallingConv(CallingConv::C);
211
212    Function::arg_iterator args = mFunction->arg_begin();
213    mKernelParam = args++;
214    mKernelParam->setName("this");
215    mInputParam = args++;
216    mInputParam->setName("input_stream");
217
218    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction, 0));
219
220    mSegmentIndex = 0;
221
222    return mFunction;
223}
224
225/** ------------------------------------------------------------------------------------------------------------- *
226 * @brief finalize
227 ** ------------------------------------------------------------------------------------------------------------- */
228void KernelBuilder::finalize() {
229
230    // Finish the actual function
231    Value * startIdx = getInternalState(mBlockIndex);
232    Value * value = iBuilder->CreateBlockAlignedLoad(startIdx);
233    value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
234    iBuilder->CreateBlockAlignedStore(value, startIdx);
235    iBuilder->CreateRetVoid();
236
237    Type * const int64Ty = iBuilder->getInt64Ty(); // TODO: should call getIntPtrTy() instead but we don't have the data layout here.
238
239    // Generate the zero initializer
240    mConstructor = cast<Function>(mMod->getOrInsertFunction(mKernelName + "_Constructor", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), nullptr));
241    mConstructor->setCallingConv(CallingConv::C);
242    auto args = mConstructor->arg_begin();
243    mKernelParam = args++;
244    mKernelParam->setName("this");
245    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
246    for (unsigned i = 0; i < mInternalState.size(); ++i) {
247        Value * const gep = getInternalState(i);
248        Type * const type = gep->getType();
249        if (type->isIntegerTy() || type->isArrayTy() || type->isVectorTy()) {
250            setInternalState(i, Constant::getNullValue(type));
251        } else {
252            Value * gep_next = iBuilder->CreateGEP(gep, iBuilder->getInt32(1));
253            Value * get_int = iBuilder->CreatePtrToInt(gep, int64Ty);
254            Value * get_next_int = iBuilder->CreatePtrToInt(gep_next, int64Ty);
255            Value * state_size = iBuilder->CreateSub(get_next_int, get_int);
256            iBuilder->CreateMemSet(gep, iBuilder->getInt8(0), state_size, 4);
257        }
258    }
259    iBuilder->CreateRetVoid();
260
261    iBuilder->ClearInsertionPoint();
262
263    mSegmentIndex = 0;
264}
265
266/** ------------------------------------------------------------------------------------------------------------- *
267 * @brief instantiate
268 *
269 * Generate a new instance of this kernel and call the default constructor to initialize it
270 ** ------------------------------------------------------------------------------------------------------------- */
271Instance * KernelBuilder::instantiate() {
272    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStructType);
273    iBuilder->CreateCall(mConstructor, memory);
274    return new Instance(this, memory);
275}
276
277/** ------------------------------------------------------------------------------------------------------------- *
278 * @brief call
279 ** ------------------------------------------------------------------------------------------------------------- */
280void KernelBuilder::call(llvm::Value * const instance, Value * inputStreams) {
281    assert (mFunction && instance && inputStreams);
282    iBuilder->CreateCall2(mFunction, instance, iBuilder->CreatePointerCast(inputStreams, mInputStreamType));
283}
284
285/** ------------------------------------------------------------------------------------------------------------- *
286 * @brief offset
287 *
288 * Compute the stream index of the given offset value.
289 ** ------------------------------------------------------------------------------------------------------------- */
290Value * KernelBuilder::getOffset(Value * const instance, const unsigned value) {
291    const unsigned adjustedOffset = (mSegmentIndex + value);
292    if (mBlockIndex) {
293        Value * offset = iBuilder->CreateBlockAlignedLoad(getBlockNo(instance));
294        if (adjustedOffset) {
295            offset = iBuilder->CreateAdd(offset, ConstantInt::get(offset->getType(), adjustedOffset));
296        }
297        const unsigned bufferSize = (mBlocksPerSegment + mCircularBufferModulo - 1); assert (bufferSize > 1);
298        if (isPowerOfTwo(bufferSize)) {
299            offset = iBuilder->CreateAnd(offset, ConstantInt::get(offset->getType(), bufferSize - 1));
300        } else {
301            offset = iBuilder->CreateURem(offset, ConstantInt::get(offset->getType(), bufferSize));
302        }
303        // TODO: generate branch / phi node when it's sufficiently unlikely that we'll wrap around.
304        return offset;
305    } else {
306        return iBuilder->getInt32(adjustedOffset);
307    }
308}
309
310/** ------------------------------------------------------------------------------------------------------------- *
311 * @brief setLongestLookaheadAmount
312 ** ------------------------------------------------------------------------------------------------------------- */
313void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
314    const unsigned blockWidth = iBuilder->getBitBlockWidth();
315    const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
316    mCircularBufferModulo = (lookaheadBlocks + 1);
317}
318
319} // end of namespace kernel
Note: See TracBrowser for help on using the repository browser.