source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 4970

Last change on this file since 4970 was 4970, checked in by nmedfort, 4 years ago

Added ability to name internal state types; removed unnecessary predefined states. Some progress towards supporting segment size > 1

File size: 14.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <pablo/function.h>
8#include <IDISA/idisa_builder.h>
9#include <llvm/Support/CommandLine.h>
10
11using namespace llvm;
12using namespace pablo;
13
14static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
15
16inline bool isPowerOfTwo(const unsigned x) {
17    return (x != 0) && (x & (x - 1)) == 0;
18}
19
20// sets name & sets internal state to the kernel superclass state
21KernelBuilder::KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b)
22: mMod(m)
23, iBuilder(b)
24, mKernelName(name)
25, mBitBlockType(b->getBitBlockType())
26, mBlockSize(b->getBitBlockWidth())
27, mBlocksPerSegment(SegmentSize)
28, mCircularBufferModulo(1)
29, mSegmentIndex(0)
30, mBlockIndex(0) {
31    assert (mBlocksPerSegment > 0);
32    mBlockIndex = addInternalState(b->getInt64Ty(), "BlockNo");
33}
34
35/** ------------------------------------------------------------------------------------------------------------- *
36 * @brief addInternalState
37 ** ------------------------------------------------------------------------------------------------------------- */
38unsigned KernelBuilder::addInternalState(Type * const type) {
39    assert (type);
40    const unsigned index = mStates.size();
41    mStates.push_back(type);
42    return index;
43}
44
45unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string name) {
46    if (LLVM_UNLIKELY(mStateNameMap.count(name) != 0)) {
47        throw std::runtime_error("Kernel already contains internal state " + name);
48    }
49    const unsigned index = addInternalState(type);
50    mStateNameMap.emplace(name, index);
51    return index;
52}
53
54/** ------------------------------------------------------------------------------------------------------------- *
55 * @brief addOutputStream
56 ** ------------------------------------------------------------------------------------------------------------- */
57void KernelBuilder::addOutputStream(const unsigned fields) {
58    assert (fields > 0);
59    mOutputStreams.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
60}
61
62/** ------------------------------------------------------------------------------------------------------------- *
63 * @brief addOutputScalar
64 ** ------------------------------------------------------------------------------------------------------------- */
65void KernelBuilder::addOutputScalar(Type * const type) {
66    assert (type);
67    mOutputScalar.push_back(type);
68}
69
70/** ------------------------------------------------------------------------------------------------------------- *
71 * @brief addInputStream
72 ** ------------------------------------------------------------------------------------------------------------- */
73void KernelBuilder::addInputStream(const unsigned fields, std::string name) {
74    assert (fields > 0 && !name.empty());
75    mInputStreamNames.push_back(name);
76    if (fields == 1){
77        mInputStreams.push_back(mBitBlockType);
78    } else {
79        mInputStreams.push_back(ArrayType::get(mBitBlockType, fields));
80    }
81}
82
83void KernelBuilder::addInputStream(const unsigned fields) {
84    addInputStream(fields, std::move(mKernelName + "_inputstream_" + std::to_string(mInputStreams.size())));
85}
86
87
88/** ------------------------------------------------------------------------------------------------------------- *
89 * @brief getInputStream
90 ** ------------------------------------------------------------------------------------------------------------- */
91Value * KernelBuilder::getInputStream(const unsigned index, const unsigned streamOffset) {
92    Value * const indices[] = {getOffset(streamOffset), iBuilder->getInt32(index)};
93    return iBuilder->CreateGEP(mInputParam, indices);
94}
95
96/** ------------------------------------------------------------------------------------------------------------- *
97 * @brief getInputScalar
98 ** ------------------------------------------------------------------------------------------------------------- */
99Value * KernelBuilder::getInputScalar(const unsigned) {
100    throw std::runtime_error("currently not supported!");
101}
102
103/** ------------------------------------------------------------------------------------------------------------- *
104 * @brief getOutputStream
105 ** ------------------------------------------------------------------------------------------------------------- */
106Value * KernelBuilder::getOutputStream(const unsigned index, const unsigned streamOffset) {
107    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(1), getOffset(streamOffset), iBuilder->getInt32(index)};
108    return iBuilder->CreateGEP(mKernelParam, indices);
109}
110
111/** ------------------------------------------------------------------------------------------------------------- *
112 * @brief getOutputScalar
113 ** ------------------------------------------------------------------------------------------------------------- */
114Value * KernelBuilder::getOutputScalar(const unsigned) {
115    throw std::runtime_error("currently not supported!");
116}
117
118/** ------------------------------------------------------------------------------------------------------------- *
119 * @brief getInternalState
120 ** ------------------------------------------------------------------------------------------------------------- */
121Value * KernelBuilder::getInternalState(const unsigned index, Value * const inputStruct) {
122    Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(index)};
123    return iBuilder->CreateGEP(inputStruct ? inputStruct : mKernelParam, indices);
124}
125
126Value * KernelBuilder::getInternalState(const std::string & name, Value * const inputStruct) {
127    const auto f = mStateNameMap.find(name);
128    if (LLVM_UNLIKELY(f == mStateNameMap.end())) {
129        throw std::runtime_error("Kernel does not contain internal state " + name);
130    }
131    return getInternalState(f->second, inputStruct);
132}
133
134/** ------------------------------------------------------------------------------------------------------------- *
135 * @brief setInternalState
136 ** ------------------------------------------------------------------------------------------------------------- */
137void KernelBuilder::setInternalState(const unsigned index, Value * const value) {
138    Value * ptr = getInternalState(index);
139    assert (ptr->getType()->getPointerElementType() == value->getType());
140    if (value->getType() == iBuilder->getBitBlockType()) {
141        iBuilder->CreateBlockAlignedStore(value, ptr);
142    } else {
143        iBuilder->CreateStore(value, ptr);
144    }
145}
146
147/** ------------------------------------------------------------------------------------------------------------- *
148 * @brief addInputScalar
149 ** ------------------------------------------------------------------------------------------------------------- */
150void KernelBuilder::addInputScalar(Type * const type, std::string name) {
151    assert (type && !name.empty());
152    mInputScalarNames.push_back(name);
153    mInputScalars.push_back(type);
154}
155
156void KernelBuilder::addInputScalar(Type * const type) {
157    addInputScalar(type, std::move(mKernelName + "_inputscalar_" + std::to_string(mInputScalars.size())));
158}
159
160/** ------------------------------------------------------------------------------------------------------------- *
161 * @brief prepareFunction
162 ** ------------------------------------------------------------------------------------------------------------- */
163Function * KernelBuilder::prepareFunction() {   
164    if (mCircularBufferModulo > 1) {
165        mBlockIndex = addInternalState(iBuilder->getInt32Ty());
166    }
167    const unsigned capacity = mBlocksPerSegment + mCircularBufferModulo - 1;
168
169    mInputStreamType = PointerType::get(StructType::get(mMod->getContext(), mInputStreams), 0);
170    mInputScalarType = PointerType::get(StructType::get(mMod->getContext(), mInputScalars), 0);
171    Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStreams), capacity);
172    Type * outputAccumType = StructType::get(mMod->getContext(), mOutputScalar);
173    Type * internalStateType = StructType::create(mMod->getContext(), mStates, mKernelName);
174    mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({internalStateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
175
176    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
177        std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType}), false);
178
179    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
180    mFunction->setCallingConv(CallingConv::C);
181
182    Function::arg_iterator args = mFunction->arg_begin();
183    mKernelParam = args++;
184    mKernelParam->setName("this");
185    mInputParam = args++;
186    mInputParam->setName("input_stream");
187
188    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction, 0));
189
190    mSegmentIndex = 0;
191
192    return mFunction;
193}
194
195/** ------------------------------------------------------------------------------------------------------------- *
196 * @brief finalize
197 ** ------------------------------------------------------------------------------------------------------------- */
198void KernelBuilder::finalize() {
199
200    // Finish the actual function
201    Value * startIdx = getInternalState(mBlockIndex);
202    Value * value = iBuilder->CreateBlockAlignedLoad(startIdx);
203    value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
204    iBuilder->CreateBlockAlignedStore(value, startIdx);
205    iBuilder->CreateRetVoid();
206
207    // Generate the zero initializer
208    Function * initializer = cast<Function>(mMod->getOrInsertFunction(mKernelName + "_Init", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), nullptr));
209    initializer->setCallingConv(CallingConv::C);
210    Function::arg_iterator args = initializer->arg_begin();
211    mKernelParam = args++;
212    mKernelParam->setName("this");
213
214    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", initializer, 0));
215
216    Type * const int64Ty = iBuilder->getInt64Ty(); // TODO: should call getIntPtrTy() instead but we don't have the data layout here.
217    for (unsigned i = 0; i < mStates.size(); ++i) {
218        Value * const gep = getInternalState(i);
219        Type * const type = gep->getType();
220        if (type->isIntegerTy() || type->isArrayTy() || type->isVectorTy()) {
221            setInternalState(i, Constant::getNullValue(type));
222        } else {           
223            Value * gep_next = iBuilder->CreateGEP(gep, iBuilder->getInt32(1));
224            Value * get_int = iBuilder->CreatePtrToInt(gep, int64Ty);
225            Value * get_next_int = iBuilder->CreatePtrToInt(gep_next, int64Ty);
226            Value * state_size = iBuilder->CreateSub(get_next_int, get_int);
227            iBuilder->CreateMemSet(gep, iBuilder->getInt8(0), state_size, 4);
228        }
229    }
230
231    iBuilder->CreateRetVoid();
232
233    // and then the constructor
234    mConstructor = cast<Function>(mMod->getOrInsertFunction(mKernelName+"_Create_Default", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), int64Ty, int64Ty, nullptr));
235    mConstructor->setCallingConv(CallingConv::C);
236    args = mConstructor->arg_begin();
237
238    mKernelParam = args++;
239    mKernelParam->setName("this");
240
241    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
242    iBuilder->CreateCall(initializer, mKernelParam);
243    iBuilder->CreateRetVoid();
244}
245
246/** ------------------------------------------------------------------------------------------------------------- *
247 * @brief generateKernelInstance
248 ** ------------------------------------------------------------------------------------------------------------- */
249Value * KernelBuilder::generateKernelInstance() {
250    mKernelStruct = iBuilder->CreateAlloca(mKernelStructType);
251    iBuilder->CreateCall3(mConstructor, mKernelStruct,
252        ConstantInt::get(iBuilder->getIntNTy(64), mBlockSize),
253        ConstantInt::get(iBuilder->getIntNTy(64), (mBlocksPerSegment + mCircularBufferModulo - 1) * mBlockSize));
254    return mKernelStruct;
255}
256
257/** ------------------------------------------------------------------------------------------------------------- *
258 * @brief generateDoBlockCall
259 ** ------------------------------------------------------------------------------------------------------------- */
260void KernelBuilder::generateInitCall() {
261    assert (mInitFunction && mKernelStruct);
262    iBuilder->CreateCall(mInitFunction, mKernelStruct);
263}
264
265/** ------------------------------------------------------------------------------------------------------------- *
266 * @brief generateDoBlockCall
267 ** ------------------------------------------------------------------------------------------------------------- */
268void KernelBuilder::generateDoBlockCall(Value * inputStreams) {
269    assert (mFunction && mKernelStruct);
270    iBuilder->CreateCall2(mFunction, mKernelStruct, iBuilder->CreatePointerCast(inputStreams, mInputStreamType));
271}
272
273/** ------------------------------------------------------------------------------------------------------------- *
274 * @brief offset
275 *
276 * Compute the stream index of the given offset value.
277 ** ------------------------------------------------------------------------------------------------------------- */
278Value * KernelBuilder::getOffset(const unsigned value) {
279    const unsigned adjustedOffset = (mSegmentIndex + value);
280    Value * offset = iBuilder->getInt32(adjustedOffset);
281    if (mBlockIndex) {
282        Value * index = iBuilder->CreateBlockAlignedLoad(getInternalState(mBlockIndex));
283        if (adjustedOffset) {
284            index = iBuilder->CreateAdd(index, offset);
285        }
286        const unsigned bufferSize = (mBlocksPerSegment + mCircularBufferModulo - 1); assert (bufferSize > 1);
287        if (isPowerOfTwo(bufferSize)) {
288            index = iBuilder->CreateAnd(index, ConstantInt::get(index->getType(), bufferSize - 1));
289        } else {
290            index = iBuilder->CreateURem(index, ConstantInt::get(index->getType(), bufferSize));
291        }
292        // TODO: generate branch / phi node when it's sufficiently unlikely that we'll wrap around.
293        offset = index;
294    }
295    return offset;
296}
297
298/** ------------------------------------------------------------------------------------------------------------- *
299 * @brief setLongestLookaheadAmount
300 ** ------------------------------------------------------------------------------------------------------------- */
301void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
302    const unsigned blockWidth = iBuilder->getBitBlockWidth();
303    const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
304    mCircularBufferModulo = (lookaheadBlocks + 1);
305}
Note: See TracBrowser for help on using the repository browser.