Changeset 6244


Ignore:
Timestamp:
Dec 17, 2018, 5:20:48 PM (5 weeks ago)
Author:
nmedfort
Message:

Potential workaround for LLVM bug on 32-bit VM on 64-bit arch.

Location:
icGREP/icgrep-devel/icgrep
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r6241 r6244  
    691691LoadInst * CBuilder::CreateAtomicLoadAcquire(Value * ptr) {
    692692    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    693     LoadInst * inst = CreateAlignedLoad(ptr, alignment);
     693    LoadInst * inst = CreateAlignedLoad(ptr, alignment, true);
    694694    inst->setOrdering(AtomicOrdering::Acquire);
    695695    return inst;
     
    699699StoreInst * CBuilder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
    700700    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    701     StoreInst * inst = CreateAlignedStore(val, ptr, alignment);
     701    StoreInst * inst = CreateAlignedStore(val, ptr, alignment, true);
    702702    inst->setOrdering(AtomicOrdering::Release);
    703703    return inst;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6241 r6244  
    104104    IntegerType * const sizeTy = b->getSizeTy();
    105105
    106     addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
     106//    addInternalScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
    107107
    108108    // TODO: if we had a way of easily calculating the number of processed/produced items of the
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6241 r6244  
    357357void PipelineCompiler::synchronize(BuilderRef b) {
    358358
     359    b->setKernel(mPipelineKernel);
    359360    const auto prefix = makeKernelName(mKernelIndex);
    360     b->setKernel(mPipelineKernel);
     361    const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
     362    const unsigned waitingOnIdx = serialize ? (mPipeline.size() - 1) : mKernelIndex;
     363    const auto waitingOn = makeKernelName(waitingOnIdx);
     364    Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_NO_SCALAR);
    361365    BasicBlock * const kernelWait = b->CreateBasicBlock(prefix + "Wait", mPipelineEnd);
    362366    b->CreateBr(kernelWait);
    363367
    364368    b->SetInsertPoint(kernelWait);
    365     const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
    366     const unsigned waitingOnIdx = serialize ? mPipeline.size() - 1 : mKernelIndex;
    367     const auto waitingOn = makeKernelName(waitingOnIdx);
    368     Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_NO_SCALAR);
    369369    Value * const processedSegmentCount = b->CreateAtomicLoadAcquire(waitingOnPtr);
    370370    assert (processedSegmentCount->getType() == mSegNo->getType());
    371371    Value * const ready = b->CreateICmpEQ(mSegNo, processedSegmentCount);
    372 
    373     BasicBlock * const kernelCheck = b->CreateBasicBlock(prefix + "Check", mPipelineEnd);
    374     b->CreateCondBr(ready, kernelCheck, kernelWait);
    375 
    376     b->SetInsertPoint(kernelCheck);
     372    BasicBlock * const kernelStart = b->CreateBasicBlock(prefix + "Start", mPipelineEnd);
     373    b->CreateCondBr(ready, kernelStart, kernelWait);
     374
     375    b->SetInsertPoint(kernelStart);
    377376    b->setKernel(mKernel);
    378377}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6241 r6244  
    1010 ** ------------------------------------------------------------------------------------------------------------- */
    1111void PipelineCompiler::generateSingleThreadKernelMethod(BuilderRef b) {
    12     Value * const localState = b->CreateCacheAlignedAlloca(getLocalStateType(b));
     12
     13    StructType * const localStateType = getLocalStateType(b);
     14    Value * const localState = b->CreateCacheAlignedAlloca(localStateType);
     15    b->CreateMemZero(localState, ConstantExpr::getSizeOf(localStateType), b->getCacheAlignment());
    1316    allocateThreadLocalState(b, localState);
    1417    setThreadLocalState(b, localState);
     
    5659    const unsigned threads = numOfThreads - 1;
    5760    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    58     AllocaInst * const pthreads = b->CreateAlloca(pthreadsTy);
     61    AllocaInst * const pthreads = b->CreateCacheAlignedAlloca(pthreadsTy);
    5962    std::vector<Value *> threadIdPtr(threads);
    6063    std::vector<Value *> threadState(threads);
     
    114117}
    115118
     119enum : int {
     120    HANDLE_INDEX = 0
     121    , SEGMENT_OFFSET_INDEX = 1
     122    , LOCAL_STATE_INDEX = 2
     123    , FIRST_STREAM_INDEX = 3
     124};
     125
    116126/** ------------------------------------------------------------------------------------------------------------- *
    117127 * @brief getThreadStateType
     
    119129inline StructType * PipelineCompiler::getThreadStateType(BuilderRef b) {
    120130    std::vector<Type *> threadStructFields;
    121     threadStructFields.push_back(mPipelineKernel->getHandle()->getType());
     131
     132    // NOTE: the following is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
     133    DataLayout DL(b->getModule());
     134    Type * handleType = mPipelineKernel->getHandle()->getType();
     135    Type * handleIntType = DL.getIntPtrType(handleType);
     136
     137    threadStructFields.push_back(handleIntType);
    122138    threadStructFields.push_back(b->getSizeTy());
    123139    threadStructFields.push_back(getLocalStateType(b));
     
    137153}
    138154
     155/** ------------------------------------------------------------------------------------------------------------- *
     156 * @brief constructThreadState
     157 ** ------------------------------------------------------------------------------------------------------------- */
     158inline AllocaInst * PipelineCompiler::allocateThreadState(BuilderRef b, const unsigned segOffset) {
     159
     160    StructType * const threadStructType = getThreadStateType(b);
     161    AllocaInst * const threadState = b->CreateCacheAlignedAlloca(threadStructType);
     162    b->CreateMemZero(threadState, ConstantExpr::getSizeOf(threadStructType), b->getCacheAlignment());
     163
     164    std::vector<Value *> indices(2);
     165    indices[0] = b->getInt32(0);
     166    indices[1] = b->getInt32(HANDLE_INDEX);
     167
     168    // NOTE: this is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
     169    DataLayout DL(b->getModule());
     170    Type * handleType = mPipelineKernel->getHandle()->getType();
     171    Type * handleIntType = DL.getIntPtrType(handleType);
     172    Value * const handleInt = b->CreatePtrToInt(mPipelineKernel->getHandle(), handleIntType);
     173    b->CreateStore(handleInt, b->CreateGEP(threadState, indices));
     174    indices[1] = b->getInt32(SEGMENT_OFFSET_INDEX);
     175    b->CreateStore(b->getSize(segOffset), b->CreateGEP(threadState, indices));
     176    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
     177    allocateThreadLocalState(b, b->CreateGEP(threadState, indices));
     178
     179    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
     180    for (unsigned i = 0; i < numOfInputs; ++i) {
     181        auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
     182        Value * const handle = buffer->getHandle();
     183
     184        indices[1] = b->getInt32(i + FIRST_STREAM_INDEX);
     185
     186        b->CreateStore(handle, b->CreateGEP(threadState, indices));
     187    }
     188    const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
     189    for (unsigned i = 0; i < numOfOutputs; ++i) {
     190        auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
     191        Value * const handle = buffer->getHandle();
     192
     193        indices[1] = b->getInt32(i + numOfInputs + FIRST_STREAM_INDEX);
     194
     195        b->CreateStore(handle, b->CreateGEP(threadState, indices));
     196    }
     197
     198    return threadState;
     199}
     200
     201/** ------------------------------------------------------------------------------------------------------------- *
     202 * @brief constructThreadState
     203 ** ------------------------------------------------------------------------------------------------------------- */
     204inline Value * PipelineCompiler::setThreadState(BuilderRef b, Value * threadState) {
     205
     206    std::vector<Value *> indices(2);
     207    indices[0] = b->getInt32(0);
     208    indices[1] = b->getInt32(HANDLE_INDEX);
     209
     210    // NOTE: this is a workaround for an LLVM bug for 32-bit VMs on 64-bit architectures
     211    Value * handleInt = b->CreateLoad(b->CreateGEP(threadState, indices));
     212    Type * handleType = mPipelineKernel->getHandle()->getType();
     213    Value * handle = b->CreateIntToPtr(handleInt, handleType);
     214    mPipelineKernel->setHandle(b, handle);
     215
     216    indices[1] = b->getInt32(SEGMENT_OFFSET_INDEX);
     217    Value * const segmentOffset = b->CreateLoad(b->CreateGEP(threadState, indices));
     218
     219    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
     220
     221    setThreadLocalState(b, b->CreateGEP(threadState, indices));
     222    const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
     223    for (unsigned i = 0; i < numOfInputs; ++i) {
     224        indices[1] = b->getInt32(i + FIRST_STREAM_INDEX);
     225        Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
     226        auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
     227        buffer->setHandle(b, streamHandle);
     228    }
     229    const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
     230    for (unsigned i = 0; i < numOfOutputs; ++i) {
     231        indices[1] = b->getInt32(i + numOfInputs + FIRST_STREAM_INDEX);
     232        Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, indices));
     233        auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
     234        buffer->setHandle(b, streamHandle);
     235    }
     236
     237    return segmentOffset;
     238}
     239
     240/** ------------------------------------------------------------------------------------------------------------- *
     241 * @brief deallocateThreadLocalSpace
     242 ** ------------------------------------------------------------------------------------------------------------- */
     243inline void PipelineCompiler::deallocateThreadState(BuilderRef b, Value * const threadState) {
     244    std::vector<Value *> indices(2);
     245    indices[0] = b->getInt32(0);
     246    indices[1] = b->getInt32(LOCAL_STATE_INDEX);
     247    deallocateThreadLocalState(b, b->CreateGEP(threadState, indices));
     248}
     249
    139250enum : int {
    140251    POP_COUNT_STRUCT_INDEX = 0
     
    142253
    143254/** ------------------------------------------------------------------------------------------------------------- *
    144  * @brief constructThreadState
    145  ** ------------------------------------------------------------------------------------------------------------- */
    146 inline AllocaInst * PipelineCompiler::allocateThreadState(BuilderRef b, const unsigned segOffset) {
    147 
    148     Constant * const ZERO = b->getInt32(0);
    149     Constant * const HANDLE = ZERO;
    150     Constant * const SEG_OFFSET = b->getInt32(1);
    151     Constant * const LOCAL_STATE = b->getInt32(2);
    152 
    153     StructType * const threadStructType = getThreadStateType(b);
    154     AllocaInst * const threadState = b->CreateAlloca(threadStructType);
    155     b->CreateStore(mPipelineKernel->getHandle(), b->CreateGEP(threadState, {ZERO, HANDLE}));
    156     b->CreateStore(b->getSize(segOffset), b->CreateGEP(threadState, {ZERO, SEG_OFFSET}));
    157     allocateThreadLocalState(b, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
    158 
    159     const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    160     for (unsigned i = 0; i < numOfInputs; ++i) {
    161         auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    162         Value * const handle = buffer->getHandle();
    163         b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, b->getInt32(i + 3)}));
    164     }
    165     const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    166     for (unsigned i = 0; i < numOfOutputs; ++i) {
    167         auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    168         Value * const handle = buffer->getHandle();
    169         b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, b->getInt32(i + numOfInputs + 3)}));
    170     }
    171 
    172     return threadState;
    173 }
    174 
    175 /** ------------------------------------------------------------------------------------------------------------- *
    176  * @brief constructThreadState
    177  ** ------------------------------------------------------------------------------------------------------------- */
    178 inline Value * PipelineCompiler::setThreadState(BuilderRef b, Value * threadState) {
    179 
    180     Constant * const ZERO = b->getInt32(0);
    181     Constant * const HANDLE = ZERO;
    182     Constant * const SEG_OFFSET = b->getInt32(1);
    183     Constant * const LOCAL_STATE = b->getInt32(2);
    184 
    185     Value * handle = b->CreateLoad(b->CreateGEP(threadState, {ZERO, HANDLE}));
    186 
    187     mPipelineKernel->setHandle(b, handle);
    188     Value * const segmentOffset = b->CreateLoad(b->CreateGEP(threadState, {ZERO, SEG_OFFSET}));
    189     setThreadLocalState(b, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
    190     const auto numOfInputs = mPipelineKernel->getNumOfStreamInputs();
    191     for (unsigned i = 0; i < numOfInputs; ++i) {
    192         Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, {ZERO, b->getInt32(i + 3)}));
    193         auto buffer = mPipelineKernel->getInputStreamSetBuffer(i);
    194         buffer->setHandle(b, streamHandle);
    195     }
    196     const auto numOfOutputs = mPipelineKernel->getNumOfStreamOutputs();
    197     for (unsigned i = 0; i < numOfOutputs; ++i) {
    198         Value * streamHandle = b->CreateLoad(b->CreateGEP(threadState, {ZERO, b->getInt32(i + numOfInputs + 3)}));
    199         auto buffer = mPipelineKernel->getOutputStreamSetBuffer(i);
    200         buffer->setHandle(b, streamHandle);
    201     }
    202 
    203     return segmentOffset;
    204 }
    205 
    206 /** ------------------------------------------------------------------------------------------------------------- *
    207  * @brief deallocateThreadLocalSpace
    208  ** ------------------------------------------------------------------------------------------------------------- */
    209 inline void PipelineCompiler::deallocateThreadState(BuilderRef b, Value * const threadState) {
    210     Constant * const ZERO = b->getInt32(0);
    211     Constant * const LOCAL_STATE = b->getInt32(2);
    212     deallocateThreadLocalState(b, b->CreateGEP(threadState, {ZERO, LOCAL_STATE}));
    213 }
    214 
    215 /** ------------------------------------------------------------------------------------------------------------- *
    216255 * @brief getLocalStateType
    217256 ** ------------------------------------------------------------------------------------------------------------- */
     
    225264 ** ------------------------------------------------------------------------------------------------------------- */
    226265inline void PipelineCompiler::allocateThreadLocalState(BuilderRef b, Value * const localState) {
    227     Constant * const ZERO = b->getInt32(0);
    228     Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
    229     allocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
     266    std::vector<Value *> indices(2);
     267    indices[0] = b->getInt32(0);
     268    indices[1] = b->getInt32(POP_COUNT_STRUCT_INDEX);
     269    allocatePopCountArrays(b, b->CreateGEP(localState, indices));
    230270}
    231271
     
    234274 ** ------------------------------------------------------------------------------------------------------------- */
    235275inline void PipelineCompiler::setThreadLocalState(BuilderRef b, Value * const localState) {
    236     Constant * const ZERO = b->getInt32(0);
    237     Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
     276    std::vector<Value *> indices(2);
     277    indices[0] = b->getInt32(0);
     278    indices[1] = b->getInt32(POP_COUNT_STRUCT_INDEX);
    238279    assert (localState->getType()->getPointerElementType() == getLocalStateType(b));
    239     mPopCountState = b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT});
     280    mPopCountState = b->CreateGEP(localState, indices);
    240281    assert (mPopCountState->getType()->getPointerElementType() == getPopCountThreadLocalStateType(b));
    241282}
     
    245286 ** ------------------------------------------------------------------------------------------------------------- */
    246287inline void PipelineCompiler::deallocateThreadLocalState(BuilderRef b, Value * const localState) {
    247     Constant * const ZERO = b->getInt32(0);
    248     Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
     288    std::vector<Value *> indices(2);
     289    indices[0] = b->getInt32(0);
     290    indices[1] = b->getInt32(POP_COUNT_STRUCT_INDEX);
    249291    assert (localState->getType()->getPointerElementType() == getLocalStateType(b));
    250     deallocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
     292    deallocatePopCountArrays(b, b->CreateGEP(localState, indices));
    251293}
    252294
     
    254296
    255297#endif // PIPELINE_LOGIC_HPP
     298
Note: See TracChangeset for help on using the changeset viewer.