source: icGREP/icgrep-devel/icgrep/kernels/kernel.cpp @ 4995

Last change on this file since 4995 was 4995, checked in by nmedfort, 3 years ago

More work on symbol table; unexpected bug with 4KiB-one page tests observed.

File size: 23.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <pablo/function.h>
8#include <IDISA/idisa_builder.h>
9#include <kernels/instance.h>
10#include <tuple>
11#include <boost/functional/hash_fwd.hpp>
12#include <unordered_map>
13
14using namespace llvm;
15using namespace pablo;
16
17inline bool isPowerOfTwo(const unsigned x) {
18    return (x != 0) && (x & (x - 1)) == 0;
19}
20
21namespace kernel {
22
23enum : unsigned {
24    INTERNAL_STATE = 0
25    , INPUT_STREAM_SET = 1
26    , OUTPUT_STREAM_SET = 2
27    , OUTPUT_SCALAR_SET = 3
28};
29
30// sets name & sets internal state to the kernel superclass state
31KernelBuilder::KernelBuilder(std::string name, Module * m, IDISA::IDISA_Builder * b, const unsigned bufferSize)
32: mMod(m)
33, iBuilder(b)
34, mKernelName(name)
35, mBitBlockType(b->getBitBlockType())
36, mBufferSize(bufferSize)
37, mBlockNoIndex(0) {
38    assert (mBufferSize > 0);
39    mBlockNoIndex = addInternalState(b->getInt64Ty(), "BlockNo");
40}
41
42SlabAllocator<Instance> Instance::mAllocator; // static allocator declaration; should probably be in a "instance.cpp"
43
44/** ------------------------------------------------------------------------------------------------------------- *
45 * @brief addInternalState
46 ** ------------------------------------------------------------------------------------------------------------- */
47unsigned KernelBuilder::addInternalState(Type * const type) {
48    assert (type);
49    const unsigned index = mInternalState.size();
50    mInternalState.push_back(type);
51    return index;
52}
53
54unsigned KernelBuilder::addInternalState(llvm::Type * const type, std::string && name) {
55    if (LLVM_UNLIKELY(mInternalStateNameMap.count(name) != 0)) {
56        throw std::runtime_error("Kernel already contains internal state '" + name + "'");
57    }
58    const unsigned index = addInternalState(type);
59    mInternalStateNameMap.emplace(name, index);
60    return index;
61}
62
63/** ------------------------------------------------------------------------------------------------------------- *
64 * @brief getInternalState
65 ** ------------------------------------------------------------------------------------------------------------- */
66Value * KernelBuilder::getInternalState(Value * const instance, const unsigned index) {
67    assert (index < mInternalState.size());
68    return getInternalState(instance, iBuilder->getInt32(index));
69}
70
71Value * KernelBuilder::getInternalState(Value * const instance, disable_implicit_conversion<Value *> index) {
72    assert (index->getType()->isIntegerTy());
73    return iBuilder->CreateGEP(instance, {iBuilder->getInt64(0), iBuilder->getInt32(INTERNAL_STATE), index});
74}
75
76Value * KernelBuilder::getInternalState(Value * const instance, const std::string & name) {
77    const auto f = mInternalStateNameMap.find(name);
78    if (LLVM_UNLIKELY(f == mInternalStateNameMap.end())) {
79        throw std::runtime_error("Kernel does not contain internal state " + name);
80    }
81    return getInternalState(instance, f->second);
82}
83
84/** ------------------------------------------------------------------------------------------------------------- *
85 * @brief setInternalState
86 ** ------------------------------------------------------------------------------------------------------------- */
87void KernelBuilder::setInternalState(Value * const instance, const std::string & name, Value * const value) {
88    Value * ptr = getInternalState(instance, name);
89    assert (ptr->getType()->getPointerElementType() == value->getType());
90    if (value->getType() == iBuilder->getBitBlockType()) {
91        iBuilder->CreateBlockAlignedStore(value, ptr);
92    } else {
93        iBuilder->CreateStore(value, ptr);
94    }
95}
96
97void KernelBuilder::setInternalState(Value * const instance, const unsigned index, Value * const value) {
98    assert (index < mInternalState.size());
99    return setInternalState(instance, iBuilder->getInt32(index), value);
100}
101
102void KernelBuilder::setInternalState(Value * const instance, disable_implicit_conversion<Value *> index, Value * const value) {
103    Value * ptr = getInternalState(instance, index);
104    assert (ptr->getType()->getPointerElementType() == value->getType());
105    if (value->getType() == iBuilder->getBitBlockType()) {
106        iBuilder->CreateBlockAlignedStore(value, ptr);
107    } else {
108        iBuilder->CreateStore(value, ptr);
109    }
110}
111
112/** ------------------------------------------------------------------------------------------------------------- *
113 * @brief addInputStream
114 ** ------------------------------------------------------------------------------------------------------------- */
115void KernelBuilder::addInputStream(const unsigned fields, std::string && name) {
116    assert (fields > 0 && !name.empty());
117    mInputStreamName.push_back(name);
118    if (fields == 1) {
119        mInputStream.push_back(mBitBlockType);
120    } else {
121        mInputStream.push_back(ArrayType::get(mBitBlockType, fields));
122    }
123}
124
125void KernelBuilder::addInputStream(const unsigned fields) {
126    addInputStream(fields, std::move(mKernelName + "_InputStream_" + std::to_string(mInputStream.size())));
127}
128
129/** ------------------------------------------------------------------------------------------------------------- *
130 * @brief getInputStream
131 ** ------------------------------------------------------------------------------------------------------------- */
132Value * KernelBuilder::getInputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
133    assert (index < mInputStream.size());
134    return getInputStream(instance, iBuilder->getInt32(index), streamOffset);
135}
136
137Value * KernelBuilder::getInputStream(Value * const instance, disable_implicit_conversion<Value *> index, const unsigned streamOffset) {
138    assert (instance && index);
139    assert (index->getType()->isIntegerTy());
140    Value * const inputStreamSet = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
141        {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(0)}));
142    Value * modFunction = iBuilder->CreateLoad(iBuilder->CreateGEP(instance,
143        {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET), iBuilder->getInt32(1)}));
144    Value * offset = iBuilder->CreateLoad(getBlockNo(instance));
145    if (streamOffset) {
146        offset = iBuilder->CreateAdd(offset, ConstantInt::get(offset->getType(), streamOffset));
147    }
148    if (LLVM_LIKELY(isa<ConstantInt>(index.get()) || inputStreamSet->getType()->getPointerElementType()->isArrayTy())) {
149        return iBuilder->CreateGEP(inputStreamSet, { iBuilder->CreateCall(modFunction, offset), index });
150    } else {
151        throw std::runtime_error("Cannot access the input stream with a non-constant value unless all input stream types are identical!");
152    }
153}
154
155/** ------------------------------------------------------------------------------------------------------------- *
156 * @brief addInputScalar
157 ** ------------------------------------------------------------------------------------------------------------- */
158void KernelBuilder::addInputScalar(Type * const type, std::string && name) {
159    assert (type && !name.empty());
160    mInputScalarName.push_back(name);
161    mInputScalar.push_back(type);
162}
163
164void KernelBuilder::addInputScalar(Type * const type) {
165    addInputScalar(type, std::move(mKernelName + "_InputScalar_" + std::to_string(mInputScalar.size())));
166}
167
168/** ------------------------------------------------------------------------------------------------------------- *
169 * @brief getInputScalar
170 ** ------------------------------------------------------------------------------------------------------------- */
171Value * KernelBuilder::getInputScalar(Value * const instance, const unsigned) {
172    assert (instance);
173    throw std::runtime_error("currently not supported!");
174}
175
176Value * KernelBuilder::getInputScalar(Value * const instance, disable_implicit_conversion<Value *>) {
177    assert (instance);
178    throw std::runtime_error("currently not supported!");
179}
180
181/** ------------------------------------------------------------------------------------------------------------- *
182 * @brief addOutputStream
183 ** ------------------------------------------------------------------------------------------------------------- */
184unsigned KernelBuilder::addOutputStream(const unsigned fields) {
185    assert (fields > 0);
186    const unsigned index = mOutputStream.size();
187    mOutputStream.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
188    return index;
189}
190
191/** ------------------------------------------------------------------------------------------------------------- *
192 * @brief addOutputScalar
193 ** ------------------------------------------------------------------------------------------------------------- */
194unsigned KernelBuilder::addOutputScalar(Type * const type) {
195    assert (type);
196    const unsigned index = mOutputScalar.size();
197    mOutputScalar.push_back(type);
198    return index;
199}
200
201/** ------------------------------------------------------------------------------------------------------------- *
202 * @brief getOutputStream
203 ** ------------------------------------------------------------------------------------------------------------- */
204Value * KernelBuilder::getOutputStream(Value * const instance, const unsigned index, const unsigned streamOffset) {
205    assert (index < mOutputStream.size());
206    return getOutputStream(instance, iBuilder->getInt32(index), streamOffset);
207}
208
209Value * KernelBuilder::getOutputStream(Value * const instance, disable_implicit_conversion<Value *> index, const unsigned streamOffset) {
210    assert (instance && index);
211    assert (index->getType()->isIntegerTy());
212    if (LLVM_LIKELY(isa<ConstantInt>(index.get()))) {
213        return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getStreamOffset(instance, streamOffset), index});
214    } else {
215        Value * const outputStreamSet = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET)});
216        if (LLVM_LIKELY(outputStreamSet->getType()->getPointerElementType()->isArrayTy())) {
217            return iBuilder->CreateGEP(outputStreamSet, {getStreamOffset(instance, streamOffset), index});
218        }
219    }
220    throw std::runtime_error("Cannot access the output stream with a non-constant value unless all output stream types are identical!");
221}
222
223/** ------------------------------------------------------------------------------------------------------------- *
224 * @brief getOutputScalar
225 ** ------------------------------------------------------------------------------------------------------------- */
226Value * KernelBuilder::getOutputScalar(Value * const instance, const unsigned) {
227    throw std::runtime_error("currently not supported!");
228}
229
230Value * KernelBuilder::getOutputScalar(Value * const instance, disable_implicit_conversion<Value *> ) {
231    throw std::runtime_error("currently not supported!");
232}
233
234/** ------------------------------------------------------------------------------------------------------------- *
235 * @brief packDataTypes
236 ** ------------------------------------------------------------------------------------------------------------- */
237llvm::Type * KernelBuilder::packDataTypes(const std::vector<llvm::Type *> & types) {
238    bool canPackIntoArray = !types.empty();
239    for (Type * type : types) {
240        if (type != types.front()) { // use canLosslesslyBitcastInto ?
241            canPackIntoArray = false;
242            break;
243        }
244    }
245    if (canPackIntoArray) {
246        return ArrayType::get(types.front(), types.size());
247    } else {
248        return StructType::get(mMod->getContext(), types);
249    }
250}
251
252/** ------------------------------------------------------------------------------------------------------------- *
253 * @brief prepareFunction
254 ** ------------------------------------------------------------------------------------------------------------- */
255Function * KernelBuilder::prepareFunction() {
256
257    PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
258    mInputStreamType = PointerType::get(packDataTypes(mInputStream), 0);
259    mInputScalarType = PointerType::get(packDataTypes(mInputScalar), 0);
260    mOutputStreamType = packDataTypes(mOutputStream);
261    Type * outputScalarType = packDataTypes(mOutputScalar);
262    Type * internalStateType = packDataTypes(mInternalState);
263    Type * inputStateType = StructType::create(mMod->getContext(), { mInputStreamType, modFunctionType});
264    Type * outputBufferType = ArrayType::get(mOutputStreamType, mBufferSize);
265    mKernelStateType = StructType::create(mMod->getContext(), {internalStateType, inputStateType, outputBufferType, outputScalarType}, mKernelName);
266
267    FunctionType * const functionType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0)}, false);
268    mDoBlock = Function::Create(functionType, GlobalValue::ExternalLinkage, mKernelName + "_DoBlock", mMod);
269    mDoBlock->setCallingConv(CallingConv::C);   
270    mDoBlock->setDoesNotCapture(1);
271    mDoBlock->setDoesNotThrow();
272
273    Function::arg_iterator args = mDoBlock->arg_begin();
274    mKernelState = args++;
275    mKernelState->setName("this");
276
277    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mDoBlock, 0));
278
279    return mDoBlock;
280}
281
282/** ------------------------------------------------------------------------------------------------------------- *
283 * @brief finalize
284 ** ------------------------------------------------------------------------------------------------------------- */
285void KernelBuilder::finalize() {
286
287    // Finish the actual function
288    Value * blockNo = getBlockNo();
289    Value * value = iBuilder->CreateLoad(blockNo);
290    value = iBuilder->CreateAdd(value, ConstantInt::get(value->getType(), 1));
291    iBuilder->CreateStore(value, blockNo);
292    iBuilder->CreateRetVoid();
293
294    eliminateRedundantMemoryOperations(mDoBlock);
295
296    // Generate the zero initializer
297    PointerType * modFunctionType = PointerType::get(FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false), 0);
298    FunctionType * constructorType = FunctionType::get(iBuilder->getVoidTy(), {PointerType::get(mKernelStateType, 0), mInputStreamType, modFunctionType}, false);
299
300    mConstructor = Function::Create(constructorType, GlobalValue::ExternalLinkage, mKernelName + "_Constructor", mMod);
301    mConstructor->setCallingConv(CallingConv::C);
302    mDoBlock->setDoesNotCapture(1);
303    mConstructor->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
304    mDoBlock->setDoesNotThrow();
305
306    auto args = mConstructor->arg_begin();
307    mKernelState = args++;
308    mKernelState->setName("this");
309    Value * const inputStream = args++;
310    inputStream->setName("inputStream");
311    Value * const modFunction = args++;
312    modFunction->setName("modFunction");
313
314    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
315    for (unsigned i = 0; i < mInternalState.size(); ++i) {
316        Type * const type = mInternalState[i];
317        if (type->isSized()) {
318            setInternalState(i, Constant::getNullValue(type));
319        } else {
320            Value * const ptr = getInternalState(i);
321            Value * const size = iBuilder->CreatePtrDiff(iBuilder->CreateGEP(ptr, iBuilder->getInt32(1)), ptr);
322            iBuilder->CallPrintInt(mKernelName + "_zeroinit_" + std::to_string(i), size);
323            iBuilder->CreateMemSet(ptr, iBuilder->getInt8(0), size, 4);
324        }
325    }
326
327    Value * const input = iBuilder->CreateGEP(mKernelState, {iBuilder->getInt32(0), iBuilder->getInt32(INPUT_STREAM_SET)});
328    iBuilder->CreateStore(inputStream, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
329    iBuilder->CreateStore(modFunction, iBuilder->CreateGEP(input, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
330    iBuilder->CreateRetVoid();
331
332//    if (mOutputStreamType->getStructNumElements()) {
333//        PointerType * outputStreamType = PointerType::get(mOutputStreamType, 0);
334//        FunctionType * type = FunctionType::get(outputStreamType, {outputStreamType, PointerType::get(blockNo->getType(), 0)}, false);
335//        mStreamSetFunction = Function::Create(type, Function::ExternalLinkage, mKernelName + "_StreamSet", mMod);
336//        auto arg = mStreamSetFunction->arg_begin();
337//        Value * stream = arg++;
338//        stream->setName("stream");
339//        mStreamSetFunction->addAttribute(1, Attribute::NoCapture);
340//        mStreamSetFunction->addAttribute(2, Attribute::NoCapture);
341//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::InlineHint);
342//        mStreamSetFunction->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
343//        Value * offset = arg;
344//        BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", mStreamSetFunction);
345//        iBuilder->SetInsertPoint(entry);
346//        if (mBufferSize != 1) {
347//            offset = iBuilder->CreateLoad(offset);
348//            if (isPowerOfTwo(mBufferSize)) {
349//                offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(mBufferSize - 1));
350//            } else if (mBufferSize > 2) {
351//                offset = iBuilder->CreateURem(offset, iBuilder->getInt64(mBufferSize));
352//            }
353//            stream = iBuilder->CreateGEP(stream, offset);
354//        }
355//        iBuilder->CreateRet(stream);
356//    }
357
358    iBuilder->ClearInsertionPoint();
359}
360
361/** ------------------------------------------------------------------------------------------------------------- *
362 * @brief eliminateRedundantMemoryOperations
363 ** ------------------------------------------------------------------------------------------------------------- */
364inline void KernelBuilder::eliminateRedundantMemoryOperations(Function * const function) {
365
366
367}
368
369/** ------------------------------------------------------------------------------------------------------------- *
370 * @brief instantiate
371 *
372 * Generate a new instance of this kernel and call the default constructor to initialize it
373 ** ------------------------------------------------------------------------------------------------------------- */
374Instance * KernelBuilder::instantiate(std::pair<Value *, unsigned> && inputStream) {
375    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
376    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET)};
377    Value * ptr = iBuilder->CreateGEP(std::get<0>(inputStream), indices);
378    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(ptr, mInputStreamType), CreateModFunction(std::get<1>(inputStream)));
379    return new Instance(this, memory);
380}
381
382/** ------------------------------------------------------------------------------------------------------------- *
383 * @brief instantiate
384 *
385 * Generate a new instance of this kernel and call the default constructor to initialize it
386 ** ------------------------------------------------------------------------------------------------------------- */
387Instance * KernelBuilder::instantiate(llvm::Value * const inputStream) {
388    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
389    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStream, mInputStreamType), CreateModFunction(0));
390    return new Instance(this, memory);
391}
392
393/** ------------------------------------------------------------------------------------------------------------- *
394 * @brief instantiate
395 *
396 * Generate a new instance of this kernel and call the default constructor to initialize it
397 ** ------------------------------------------------------------------------------------------------------------- */
398Instance * KernelBuilder::instantiate(std::initializer_list<llvm::Value *> inputStreams) {
399    if (mInputStreamType->getStructNumElements() != inputStreams.size()) {
400        throw std::runtime_error(mKernelName + ".instantiate expected " + std::to_string(inputStreams.size()) +
401                                 "elements but was given " + std::to_string(mInputStreamType->getStructNumElements()));
402    }
403    AllocaInst * const memory = iBuilder->CreateAlloca(mKernelStateType);
404    AllocaInst * inputStruct = iBuilder->CreateAlloca(mInputStreamType, 0);
405    unsigned i = 0;
406    for (Value * inputStream : inputStreams) {
407        Value * ptr = iBuilder->CreateGEP(inputStruct, { iBuilder->getInt32(0), iBuilder->getInt32(i++)});
408        iBuilder->CreateStore(inputStream, ptr);
409    }
410    iBuilder->CreateCall3(mConstructor, memory, iBuilder->CreatePointerCast(inputStruct, mInputStreamType), CreateModFunction(0));
411    return new Instance(this, memory);
412}
413
414/** ------------------------------------------------------------------------------------------------------------- *
415 * @brief CreateDoBlockCall
416 ** ------------------------------------------------------------------------------------------------------------- */
417void KernelBuilder::CreateDoBlockCall(Value * const instance) {
418    assert (mDoBlock && instance);
419    iBuilder->CreateCall(mDoBlock, instance);
420}
421
422/** ------------------------------------------------------------------------------------------------------------- *
423 * @brief clearOutputStreamSet
424 *
425 * Zero out the i + streamOffset stream set memory, where i is the current stream set indicated by the BlockNo.
426 ** ------------------------------------------------------------------------------------------------------------- */
427void KernelBuilder::clearOutputStreamSet(Value * const instance, const unsigned streamOffset) {
428    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(OUTPUT_STREAM_SET), getStreamOffset(instance, streamOffset)};
429    Value * ptr = iBuilder->CreateGEP(instance, indices);
430    unsigned size = 0;
431    for (unsigned i = 0; i < mOutputStreamType->getStructNumElements(); ++i) {
432        size += mOutputStreamType->getStructElementType(i)->getPrimitiveSizeInBits();
433    }
434    iBuilder->CreateMemSet(ptr, iBuilder->getInt8(0), size / 8, 4);
435}
436
437/** ------------------------------------------------------------------------------------------------------------- *
438 * @brief offset
439 *
440 * Compute the stream index of the given offset value.
441 ** ------------------------------------------------------------------------------------------------------------- */
442Value * KernelBuilder::getStreamOffset(Value * const instance, const unsigned index) {
443    Value * offset = nullptr;
444    if (mBufferSize > 1) {
445        offset = iBuilder->CreateLoad(getBlockNo(instance));
446        if (index) {
447            offset = iBuilder->CreateAdd(offset, iBuilder->getInt64(index));
448        }
449        if (isPowerOfTwo(mBufferSize)) {
450            offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(mBufferSize - 1));
451        } else {
452            offset = iBuilder->CreateURem(offset, iBuilder->getInt64(mBufferSize));
453        }
454    } else {
455        offset = iBuilder->getInt64(index);
456    }
457    return offset;
458}
459
460/** ------------------------------------------------------------------------------------------------------------- *
461 * @brief CreateModFunction
462 *
463 * Generate a "modulo" function that dictates the local offset of a given blockNo
464 ** ------------------------------------------------------------------------------------------------------------- */
465inline Function * KernelBuilder::CreateModFunction(const unsigned size) {
466    const std::string name((size == 0) ? "continuous" : "finite" + std::to_string(size));
467    Function * function = mMod->getFunction(name);
468    if (function) {
469        return function;
470    }
471    const auto ip = iBuilder->saveIP();
472    FunctionType * type = FunctionType::get(iBuilder->getInt64Ty(), {iBuilder->getInt64Ty()}, false);
473    function = Function::Create(type, Function::ExternalLinkage, name, mMod);
474    Value * offset = function->arg_begin();
475    offset->setName("index");
476    BasicBlock * entry = BasicBlock::Create(mMod->getContext(), "entry", function);
477    iBuilder->SetInsertPoint(entry);
478    if (size) {
479        if (size == 1) {
480            offset = iBuilder->getInt64(0);
481        } else if (isPowerOfTwo(size)) {
482            offset = iBuilder->CreateAnd(offset, iBuilder->getInt64(size - 1));
483        } else {
484            offset = iBuilder->CreateURem(offset, iBuilder->getInt64(size));
485        }
486    }
487    iBuilder->CreateRet(offset);
488    iBuilder->restoreIP(ip);
489    return function;
490}
491
492} // end of namespace kernel
Note: See TracBrowser for help on using the repository browser.