source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 4968

Last change on this file since 4968 was 4968, checked in by nmedfort, 4 years ago

Some fixes for threading and kernel builder.

File size: 14.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <pablo/function.h>
8#include <IDISA/idisa_builder.h>
9#include <llvm/Support/CommandLine.h>
10
11using namespace llvm;
12using namespace pablo;
13
14static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("LLVM IR file"), cl::init(1));
15
16inline bool isPowerOfTwo(const unsigned x) {
17    return (x != 0) && (x & (x - 1)) == 0;
18}
19
20// sets name & sets internal state to the kernel superclass state
21KernelBuilder::KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b)
22: mMod(m)
23, iBuilder(b)
24, mKernelName(name)
25, mBitBlockType(b->getBitBlockType())
26, mBlockSize(b->getBitBlockWidth())
27, mBlocksPerSegment(SegmentSize)
28, mCircularBufferModulo(1)
29, mSegmentIndex(0)
30, mStartIndex(0) {
31    assert (mBlocksPerSegment > 0);
32    addInternalStateType(b->getInt64Ty());
33    addInternalStateType(b->getInt64Ty());
34    addInternalStateType(b->getInt64Ty());
35    addInternalStateType(b->getInt64Ty());
36}
37
38/** ------------------------------------------------------------------------------------------------------------- *
39 * @brief addInternalStateType
40 ** ------------------------------------------------------------------------------------------------------------- */
41unsigned KernelBuilder::addInternalStateType(Type * const type) {
42    assert (type);
43    const unsigned index = mStates.size();
44    mStates.push_back(type);
45    return index;
46}
47
48/** ------------------------------------------------------------------------------------------------------------- *
49 * @brief addOutputStream
50 ** ------------------------------------------------------------------------------------------------------------- */
51void KernelBuilder::addOutputStream(const unsigned fields) {
52    assert (fields > 0);
53    mOutputStreams.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
54}
55
56/** ------------------------------------------------------------------------------------------------------------- *
57 * @brief addOutputAccum
58 ** ------------------------------------------------------------------------------------------------------------- */
59void KernelBuilder::addOutputAccum(Type * const type) {
60    assert (type);
61    mOutputAccums.push_back(type);
62}
63
64/** ------------------------------------------------------------------------------------------------------------- *
65 * @brief addInputStream
66 ** ------------------------------------------------------------------------------------------------------------- */
67void KernelBuilder::addInputStream(const unsigned fields, std::string name) {
68    assert (fields > 0);
69    if (name.empty())
70        mInputStreamNames.push_back(mKernelName + "_inputstream_" + std::to_string(mInputStreams.size()));
71    else
72        mInputStreamNames.push_back(name);
73
74    if (fields == 1){
75        mInputStreams.push_back(mBitBlockType);
76    } else {
77        mInputStreams.push_back(ArrayType::get(mBitBlockType, fields));
78    }
79}
80
81/** ------------------------------------------------------------------------------------------------------------- *
82 * @brief addInputScalar
83 ** ------------------------------------------------------------------------------------------------------------- */
84void KernelBuilder::addInputScalar(Type * const type, std::string name) {
85    if (name.empty())
86        mInputScalarNames.push_back(mKernelName + "_inputscalar_" + std::to_string(mInputScalars.size()));
87    else
88        mInputScalarNames.push_back(name);
89
90    mInputScalars.push_back(type);
91}
92
93/** ------------------------------------------------------------------------------------------------------------- *
94 * @brief prepareFunction
95 ** ------------------------------------------------------------------------------------------------------------- */
96Function * KernelBuilder::prepareFunction() {   
97    if (mCircularBufferModulo > 1) {
98        mStartIndex = addInternalStateType(iBuilder->getInt32Ty());
99    }
100    const unsigned capacity = mBlocksPerSegment + mCircularBufferModulo - 1;
101
102    mInputStreamType = PointerType::get(StructType::get(mMod->getContext(), mInputStreams), 0);
103    mInputScalarType = PointerType::get(StructType::get(mMod->getContext(), mInputScalars), 0);
104    Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStreams), capacity);
105    Type * outputAccumType = StructType::get(mMod->getContext(), mOutputAccums);
106    Type * internalStateType = StructType::create(mMod->getContext(), mStates, mKernelName);
107    mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({internalStateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
108
109    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
110        std::vector<Type *>({PointerType::get(mKernelStructType, 0), mInputStreamType}), false);
111
112    mFunction = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
113    mFunction->setCallingConv(CallingConv::C);
114
115    Function::arg_iterator args = mFunction->arg_begin();
116    mKernelParam = args++;
117    mKernelParam->setName("this");
118
119    mInputParam = args++;
120    mInputParam->setName("input_stream");
121
122    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction, 0));
123
124    mSegmentIndex = 0;
125
126    return mFunction;
127}
128
129/** ------------------------------------------------------------------------------------------------------------- *
130 * @brief finalize
131 ** ------------------------------------------------------------------------------------------------------------- */
132void KernelBuilder::finalize() {
133    // Finish the actual function
134    if (mCircularBufferModulo > 1) {
135        Value * startIdx = getInternalState(mStartIndex);
136        Value * value = iBuilder->CreateBlockAlignedLoad(startIdx);
137        value = iBuilder->CreateAdd(value, iBuilder->getInt32(1));
138        iBuilder->CreateBlockAlignedStore(value, startIdx);
139    }
140    iBuilder->CreateRetVoid();
141
142
143    // Generate the zero initializer
144    Function * initializer = cast<Function>(mMod->getOrInsertFunction(mKernelName + "_Init", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), nullptr));
145    initializer->setCallingConv(CallingConv::C);
146    Function::arg_iterator args = initializer->arg_begin();
147
148    mKernelParam = args++;
149    mKernelParam->setName("this");
150
151    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", initializer, 0));
152
153    Type * const int64Ty = iBuilder->getInt64Ty(); // TODO: should call getIntPtrTy() instead but we don't have the data layout here.
154    for (unsigned i = 0; i < mStates.size(); ++i) {
155        Value * const gep = getInternalState(i);
156        Type * const type = gep->getType();
157        if (type->isIntegerTy() || type->isArrayTy() || type->isVectorTy()) {
158            setInternalState(i, Constant::getNullValue(type));
159        } else {           
160            Value * gep_next = iBuilder->CreateGEP(gep, iBuilder->getInt32(1));
161            Value * get_int = iBuilder->CreatePtrToInt(gep, int64Ty);
162            Value * get_next_int = iBuilder->CreatePtrToInt(gep_next, int64Ty);
163            Value * state_size = iBuilder->CreateSub(get_next_int, get_int);
164            iBuilder->CreateMemSet(gep, iBuilder->getInt8(0), state_size, 4);
165        }
166    }
167
168    iBuilder->CreateRetVoid();
169
170    // and then the constructor
171    mConstructor = cast<Function>(mMod->getOrInsertFunction(mKernelName+"_Create_Default", Type::getVoidTy(mMod->getContext()), PointerType::get(mKernelStructType, 0), int64Ty, int64Ty, nullptr));
172    mConstructor->setCallingConv(CallingConv::C);
173    args = mConstructor->arg_begin();
174
175    mKernelParam = args++;
176    mKernelParam->setName("this");
177
178    Value* block_size_param = args++;
179    block_size_param->setName("block_size");
180    Value* seg_size_param = args++;
181    seg_size_param->setName("seg_size");
182    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
183    setInternalState(0, block_size_param);
184    setInternalState(1, seg_size_param);
185    iBuilder->CreateCall(initializer, mKernelParam);
186    iBuilder->CreateRetVoid();
187}
188
189/** ------------------------------------------------------------------------------------------------------------- *
190 * @brief generateKernelInstance
191 ** ------------------------------------------------------------------------------------------------------------- */
192Value * KernelBuilder::generateKernelInstance() {
193    mKernelStruct = iBuilder->CreateAlloca(mKernelStructType);
194    iBuilder->CreateCall3(mConstructor, mKernelStruct,
195        ConstantInt::get(iBuilder->getIntNTy(64), mBlockSize),
196        ConstantInt::get(iBuilder->getIntNTy(64), (mBlocksPerSegment + mCircularBufferModulo - 1) * mBlockSize));
197    return mKernelStruct;
198}
199
200/** ------------------------------------------------------------------------------------------------------------- *
201 * @brief getInputStream
202 ** ------------------------------------------------------------------------------------------------------------- */
203Value * KernelBuilder::getInputStream(const unsigned index, const unsigned streamOffset) {
204    Value * const indices[] = {getOffset(streamOffset), iBuilder->getInt32(index)};
205    return iBuilder->CreateGEP(mInputParam, indices);
206}
207
208/** ------------------------------------------------------------------------------------------------------------- *
209 * @brief getInputScalar
210 ** ------------------------------------------------------------------------------------------------------------- */
211Value * KernelBuilder::getInputScalar(const unsigned index) {
212    throw std::runtime_error("currently not supported!");
213}
214
215/** ------------------------------------------------------------------------------------------------------------- *
216 * @brief getKernelState
217 ** ------------------------------------------------------------------------------------------------------------- */
218Value * KernelBuilder::getKernelState(const unsigned index, const unsigned streamOffset) {
219    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(0), getOffset(streamOffset), iBuilder->getInt32(index)};
220    return iBuilder->CreateGEP(mKernelParam, indices);
221}
222
223/** ------------------------------------------------------------------------------------------------------------- *
224 * @brief getOutputStream
225 ** ------------------------------------------------------------------------------------------------------------- */
226Value * KernelBuilder::getOutputStream(const unsigned index, const unsigned streamOffset) {
227    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(1), getOffset(streamOffset), iBuilder->getInt32(index)};
228    return iBuilder->CreateGEP(mKernelParam, indices);
229}
230
231/** ------------------------------------------------------------------------------------------------------------- *
232 * @brief getOutputScalar
233 ** ------------------------------------------------------------------------------------------------------------- */
234Value * KernelBuilder::getOutputScalar(const unsigned index) {
235//    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(2), getOffset(0), iBuilder->getInt32(index)};
236//    return iBuilder->CreateGEP(mKernelParam, indices);
237    throw std::runtime_error("currently not supported!");
238}
239
240/** ------------------------------------------------------------------------------------------------------------- *
241 * @brief getInternalState
242 ** ------------------------------------------------------------------------------------------------------------- */
243Value * KernelBuilder::getInternalState(const unsigned index){
244    Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(index)};
245    return iBuilder->CreateGEP(mKernelParam, indices);
246}
247
248/** ------------------------------------------------------------------------------------------------------------- *
249 * @brief setInternalState
250 ** ------------------------------------------------------------------------------------------------------------- */
251void KernelBuilder::setInternalState(const unsigned index, Value * const value) {
252    Value * ptr = getInternalState(index);
253    assert (ptr->getType()->getPointerElementType() == value->getType());
254    if (value->getType() == iBuilder->getBitBlockType()) {
255        iBuilder->CreateBlockAlignedStore(value, ptr);
256    } else {
257        iBuilder->CreateStore(value, ptr);
258    }
259}
260
261/** ------------------------------------------------------------------------------------------------------------- *
262 * @brief generateDoBlockCall
263 ** ------------------------------------------------------------------------------------------------------------- */
264void KernelBuilder::generateInitCall() {
265    assert (mInitFunction && mKernelStruct);
266    iBuilder->CreateCall(mInitFunction, mKernelStruct);
267}
268
269/** ------------------------------------------------------------------------------------------------------------- *
270 * @brief generateDoBlockCall
271 ** ------------------------------------------------------------------------------------------------------------- */
272void KernelBuilder::generateDoBlockCall(Value * inputStreams) {
273    assert (mFunction && mKernelStruct);
274    iBuilder->CreateCall2(mFunction, mKernelStruct, iBuilder->CreatePointerCast(inputStreams, mInputStreamType));
275}
276
277/** ------------------------------------------------------------------------------------------------------------- *
278 * @brief offset
279 *
280 * Compute the stream index of the given offset value.
281 ** ------------------------------------------------------------------------------------------------------------- */
282Value * KernelBuilder::getOffset(const unsigned value) {
283    const unsigned adjustedOffset = (mSegmentIndex + value);
284    Value * offset = iBuilder->getInt32(adjustedOffset);
285    if (mStartIndex) {
286        Value * index = iBuilder->CreateBlockAlignedLoad(getInternalState(mStartIndex));
287        if (adjustedOffset) {
288            index = iBuilder->CreateAdd(index, offset);
289        }
290        const unsigned bufferSize = (mBlocksPerSegment + mCircularBufferModulo - 1); assert (bufferSize > 1);
291        if (isPowerOfTwo(bufferSize)) {
292            index = iBuilder->CreateAnd(index, ConstantInt::get(index->getType(), bufferSize - 1));
293        } else {
294            index = iBuilder->CreateURem(index, ConstantInt::get(index->getType(), bufferSize));
295        }
296        // TODO: generate branch / phi node when it's sufficiently unlikely that we'll wrap around.
297        offset = index;
298    }
299    return offset;
300}
301
302/** ------------------------------------------------------------------------------------------------------------- *
303 * @brief setLongestLookaheadAmount
304 ** ------------------------------------------------------------------------------------------------------------- */
305void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
306    const unsigned blockWidth = iBuilder->getBitBlockWidth();
307    const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
308    mCircularBufferModulo = (lookaheadBlocks + 1);
309}
Note: See TracBrowser for help on using the repository browser.