Ignore:
Timestamp:
Mar 10, 2016, 4:10:02 PM (4 years ago)
Author:
nmedfort
Message:

Some fixes for threading and kernel builder.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r4959 r4968  
    77#include <pablo/function.h>
    88#include <IDISA/idisa_builder.h>
     9#include <llvm/Support/CommandLine.h>
    910
    1011using namespace llvm;
    1112using namespace pablo;
     13
     14static cl::opt<unsigned> SegmentSize("segment-size", cl::desc("Segment Size"), cl::value_desc("LLVM IR file"), cl::init(1));
    1215
    1316inline bool isPowerOfTwo(const unsigned x) {
     
    2225, mBitBlockType(b->getBitBlockType())
    2326, mBlockSize(b->getBitBlockWidth())
    24 , mBlocksPerSegment(1)
     27, mBlocksPerSegment(SegmentSize)
    2528, mCircularBufferModulo(1)
    2629, mSegmentIndex(0)
    2730, mStartIndex(0) {
     31    assert (mBlocksPerSegment > 0);
    2832    addInternalStateType(b->getInt64Ty());
    2933    addInternalStateType(b->getInt64Ty());
     
    3236}
    3337
    34 unsigned KernelBuilder::addInternalStateType(Type * type){
    35     unsigned idx = mStates.size();
     38/** ------------------------------------------------------------------------------------------------------------- *
     39 * @brief addInternalStateType
     40 ** ------------------------------------------------------------------------------------------------------------- */
     41unsigned KernelBuilder::addInternalStateType(Type * const type) {
     42    assert (type);
     43    const unsigned index = mStates.size();
    3644    mStates.push_back(type);
    37     return idx;
    38 }
    39 void KernelBuilder::addOutputStream(const unsigned fields){
    40     if (fields == 1){
    41         mOutputStreams.push_back(mBitBlockType);
    42     }
    43     else {
    44         mOutputStreams.push_back(ArrayType::get(mBitBlockType, fields));
    45     }
    46 
    47 }
    48 void KernelBuilder::addOutputAccum(Type * t){
    49     mOutputAccums.push_back(t);
    50 
    51 }
    52 void KernelBuilder::addInputStream(const unsigned fields, std::string name){
     45    return index;
     46}
     47
     48/** ------------------------------------------------------------------------------------------------------------- *
     49 * @brief addOutputStream
     50 ** ------------------------------------------------------------------------------------------------------------- */
     51void KernelBuilder::addOutputStream(const unsigned fields) {
     52    assert (fields > 0);
     53    mOutputStreams.push_back((fields == 1) ? mBitBlockType : ArrayType::get(mBitBlockType, fields));
     54}
     55
     56/** ------------------------------------------------------------------------------------------------------------- *
     57 * @brief addOutputAccum
     58 ** ------------------------------------------------------------------------------------------------------------- */
     59void KernelBuilder::addOutputAccum(Type * const type) {
     60    assert (type);
     61    mOutputAccums.push_back(type);
     62}
     63
     64/** ------------------------------------------------------------------------------------------------------------- *
     65 * @brief addInputStream
     66 ** ------------------------------------------------------------------------------------------------------------- */
     67void KernelBuilder::addInputStream(const unsigned fields, std::string name) {
     68    assert (fields > 0);
    5369    if (name.empty())
    5470        mInputStreamNames.push_back(mKernelName + "_inputstream_" + std::to_string(mInputStreams.size()));
     
    6278    }
    6379}
    64 void KernelBuilder::addInputScalar(Type * t, std::string name){
     80
     81/** ------------------------------------------------------------------------------------------------------------- *
     82 * @brief addInputScalar
     83 ** ------------------------------------------------------------------------------------------------------------- */
     84void KernelBuilder::addInputScalar(Type * const type, std::string name) {
    6585    if (name.empty())
    6686        mInputScalarNames.push_back(mKernelName + "_inputscalar_" + std::to_string(mInputScalars.size()));
     
    6888        mInputScalarNames.push_back(name);
    6989
    70     mInputScalars.push_back(t);
     90    mInputScalars.push_back(type);
    7191}
    7292
     
    7999    }
    80100    const unsigned capacity = mBlocksPerSegment + mCircularBufferModulo - 1;
    81     mInputStreamType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), mInputStreams), capacity), 0);
     101
     102    mInputStreamType = PointerType::get(StructType::get(mMod->getContext(), mInputStreams), 0);
    82103    mInputScalarType = PointerType::get(StructType::get(mMod->getContext(), mInputScalars), 0);
    83104    Type * outputStreamType = ArrayType::get(StructType::get(mMod->getContext(), mOutputStreams), capacity);
    84105    Type * outputAccumType = StructType::get(mMod->getContext(), mOutputAccums);
    85     Type * stateType = StructType::create(mMod->getContext(), mStates, mKernelName);
    86     mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({stateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
     106    Type * internalStateType = StructType::create(mMod->getContext(), mStates, mKernelName);
     107    mKernelStructType = StructType::create(mMod->getContext(),std::vector<Type *>({internalStateType, outputStreamType, outputAccumType}), "KernelStruct_"+ mKernelName);
    87108
    88109    FunctionType * functionType = FunctionType::get(Type::getVoidTy(mMod->getContext()),
     
    110131 ** ------------------------------------------------------------------------------------------------------------- */
    111132void KernelBuilder::finalize() {
    112     Type * const int64Ty = iBuilder->getInt64Ty();
    113 
    114133    // Finish the actual function
    115134    if (mCircularBufferModulo > 1) {
    116135        Value * startIdx = getInternalState(mStartIndex);
    117         Value * value = iBuilder->CreateAdd(iBuilder->CreateBlockAlignedLoad(startIdx), iBuilder->getInt32(1));
     136        Value * value = iBuilder->CreateBlockAlignedLoad(startIdx);
     137        value = iBuilder->CreateAdd(value, iBuilder->getInt32(1));
    118138        iBuilder->CreateBlockAlignedStore(value, startIdx);
    119139    }
     
    131151    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", initializer, 0));
    132152
     153    Type * const int64Ty = iBuilder->getInt64Ty(); // TODO: should call getIntPtrTy() instead but we don't have the data layout here.
    133154    for (unsigned i = 0; i < mStates.size(); ++i) {
    134155        Value * const gep = getInternalState(i);
     
    136157        if (type->isIntegerTy() || type->isArrayTy() || type->isVectorTy()) {
    137158            setInternalState(i, Constant::getNullValue(type));
    138         } else {
     159        } else {           
    139160            Value * gep_next = iBuilder->CreateGEP(gep, iBuilder->getInt32(1));
    140161            Value * get_int = iBuilder->CreatePtrToInt(gep, int64Ty);
     
    160181    seg_size_param->setName("seg_size");
    161182    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mConstructor, 0));
    162     iBuilder->CreateStore(block_size_param, getInternalState(0));
    163     iBuilder->CreateStore(seg_size_param, getInternalState(1));
     183    setInternalState(0, block_size_param);
     184    setInternalState(1, seg_size_param);
    164185    iBuilder->CreateCall(initializer, mKernelParam);
    165186    iBuilder->CreateRetVoid();
     
    173194    iBuilder->CreateCall3(mConstructor, mKernelStruct,
    174195        ConstantInt::get(iBuilder->getIntNTy(64), mBlockSize),
    175         ConstantInt::get(iBuilder->getIntNTy(64), (mBlocksPerSegment + mCircularBufferModulo) * mBlockSize));
     196        ConstantInt::get(iBuilder->getIntNTy(64), (mBlocksPerSegment + mCircularBufferModulo - 1) * mBlockSize));
    176197    return mKernelStruct;
    177 
    178 }
    179 
     198}
     199
     200/** ------------------------------------------------------------------------------------------------------------- *
     201 * @brief getInputStream
     202 ** ------------------------------------------------------------------------------------------------------------- */
    180203Value * KernelBuilder::getInputStream(const unsigned index, const unsigned streamOffset) {
    181     Value * const indices[] = {iBuilder->getInt32(0), getOffset(streamOffset), iBuilder->getInt32(index)};
     204    Value * const indices[] = {getOffset(streamOffset), iBuilder->getInt32(index)};
    182205    return iBuilder->CreateGEP(mInputParam, indices);
    183206}
    184207
     208/** ------------------------------------------------------------------------------------------------------------- *
     209 * @brief getInputScalar
     210 ** ------------------------------------------------------------------------------------------------------------- */
     211Value * KernelBuilder::getInputScalar(const unsigned index) {
     212    throw std::runtime_error("currently not supported!");
     213}
     214
     215/** ------------------------------------------------------------------------------------------------------------- *
     216 * @brief getKernelState
     217 ** ------------------------------------------------------------------------------------------------------------- */
    185218Value * KernelBuilder::getKernelState(const unsigned index, const unsigned streamOffset) {
    186219    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(0), getOffset(streamOffset), iBuilder->getInt32(index)};
     
    188221}
    189222
     223/** ------------------------------------------------------------------------------------------------------------- *
     224 * @brief getOutputStream
     225 ** ------------------------------------------------------------------------------------------------------------- */
    190226Value * KernelBuilder::getOutputStream(const unsigned index, const unsigned streamOffset) {
    191227    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(1), getOffset(streamOffset), iBuilder->getInt32(index)};
     
    193229}
    194230
    195 Value * KernelBuilder::getOutputScalar(const unsigned index, const unsigned streamOffset) {
    196     Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(2), getOffset(streamOffset), iBuilder->getInt32(index)};
    197     return iBuilder->CreateGEP(mKernelParam, indices);
    198 }
    199 
     231/** ------------------------------------------------------------------------------------------------------------- *
     232 * @brief getOutputScalar
     233 ** ------------------------------------------------------------------------------------------------------------- */
     234Value * KernelBuilder::getOutputScalar(const unsigned index) {
     235//    Value * const indices[] = {iBuilder->getInt32(0), iBuilder->getInt32(2), getOffset(0), iBuilder->getInt32(index)};
     236//    return iBuilder->CreateGEP(mKernelParam, indices);
     237    throw std::runtime_error("currently not supported!");
     238}
     239
     240/** ------------------------------------------------------------------------------------------------------------- *
     241 * @brief getInternalState
     242 ** ------------------------------------------------------------------------------------------------------------- */
    200243Value * KernelBuilder::getInternalState(const unsigned index){
    201244    Value* indices[] = {iBuilder->getInt64(0), iBuilder->getInt32(0), iBuilder->getInt32(index)};
     
    203246}
    204247
     248/** ------------------------------------------------------------------------------------------------------------- *
     249 * @brief setInternalState
     250 ** ------------------------------------------------------------------------------------------------------------- */
    205251void KernelBuilder::setInternalState(const unsigned index, Value * const value) {
    206     iBuilder->CreateBlockAlignedStore(value, getInternalState(index));
    207 }
    208 
    209 void KernelBuilder::generateInitCall(){
     252    Value * ptr = getInternalState(index);
     253    assert (ptr->getType()->getPointerElementType() == value->getType());
     254    if (value->getType() == iBuilder->getBitBlockType()) {
     255        iBuilder->CreateBlockAlignedStore(value, ptr);
     256    } else {
     257        iBuilder->CreateStore(value, ptr);
     258    }
     259}
     260
     261/** ------------------------------------------------------------------------------------------------------------- *
     262 * @brief generateDoBlockCall
     263 ** ------------------------------------------------------------------------------------------------------------- */
     264void KernelBuilder::generateInitCall() {
     265    assert (mInitFunction && mKernelStruct);
    210266    iBuilder->CreateCall(mInitFunction, mKernelStruct);
    211267}
    212268
    213 void KernelBuilder::generateDoBlockCall(Value * inputStreams){
    214     iBuilder->CreateCall2(mFunction, mKernelStruct, inputStreams);
     269/** ------------------------------------------------------------------------------------------------------------- *
     270 * @brief generateDoBlockCall
     271 ** ------------------------------------------------------------------------------------------------------------- */
     272void KernelBuilder::generateDoBlockCall(Value * inputStreams) {
     273    assert (mFunction && mKernelStruct);
     274    iBuilder->CreateCall2(mFunction, mKernelStruct, iBuilder->CreatePointerCast(inputStreams, mInputStreamType));
    215275}
    216276
     
    218278 * @brief offset
    219279 *
    220  * Compute the index of the given offset value.
    221  ** ------------------------------------------------------------------------------------------------------------- */
    222 Value * KernelBuilder::getOffset(const unsigned offset) {
    223     Value * index = iBuilder->getInt32(mSegmentIndex + offset);
     280 * Compute the stream index of the given offset value.
     281 ** ------------------------------------------------------------------------------------------------------------- */
     282Value * KernelBuilder::getOffset(const unsigned value) {
     283    const unsigned adjustedOffset = (mSegmentIndex + value);
     284    Value * offset = iBuilder->getInt32(adjustedOffset);
    224285    if (mStartIndex) {
    225         index = iBuilder->CreateAdd(iBuilder->CreateBlockAlignedLoad(getInternalState(mStartIndex)), index);
    226         const unsigned capacity = (mBlocksPerSegment + mCircularBufferModulo);
    227         if (isPowerOfTwo(capacity)) {
    228             index = iBuilder->CreateAnd(index, ConstantInt::get(index->getType(), capacity - 1));
     286        Value * index = iBuilder->CreateBlockAlignedLoad(getInternalState(mStartIndex));
     287        if (adjustedOffset) {
     288            index = iBuilder->CreateAdd(index, offset);
     289        }
     290        const unsigned bufferSize = (mBlocksPerSegment + mCircularBufferModulo - 1); assert (bufferSize > 1);
     291        if (isPowerOfTwo(bufferSize)) {
     292            index = iBuilder->CreateAnd(index, ConstantInt::get(index->getType(), bufferSize - 1));
    229293        } else {
    230             index = iBuilder->CreateURem(index, ConstantInt::get(index->getType(), capacity));
     294            index = iBuilder->CreateURem(index, ConstantInt::get(index->getType(), bufferSize));
    231295        }
    232296        // TODO: generate branch / phi node when it's sufficiently unlikely that we'll wrap around.
    233     }
    234     return index;
    235 }
    236 
     297        offset = index;
     298    }
     299    return offset;
     300}
     301
     302/** ------------------------------------------------------------------------------------------------------------- *
     303 * @brief setLongestLookaheadAmount
     304 ** ------------------------------------------------------------------------------------------------------------- */
     305void KernelBuilder::setLongestLookaheadAmount(const unsigned bits) {
     306    const unsigned blockWidth = iBuilder->getBitBlockWidth();
     307    const unsigned lookaheadBlocks = (bits + blockWidth - 1) / blockWidth;
     308    mCircularBufferModulo = (lookaheadBlocks + 1);
     309}
Note: See TracChangeset for help on using the changeset viewer.