source: icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp @ 6233

Last change on this file since 6233 was 6233, checked in by nmedfort, 4 months ago

Moved termination signals into pipeline kernel

File size: 8.1 KB
Line 
1#ifndef PIPELINE_LOGIC_HPP
2#define PIPELINE_LOGIC_HPP
3
4#include "pipeline_compiler.hpp"
5
6namespace kernel {
7
8/** ------------------------------------------------------------------------------------------------------------- *
9 * @brief compileSingleThread
10 ** ------------------------------------------------------------------------------------------------------------- */
11void PipelineCompiler::generateSingleThreadKernelMethod(BuilderRef b) {
12    Value * const localState = allocateThreadLocalSpace(b);
13    setThreadLocalSpace(b, localState);
14    start(b, b->getSize(0));
15    for (unsigned i = 0; i < mPipeline.size(); ++i) {
16        setActiveKernel(b, i);
17        executeKernel(b);
18    }
19    end(b, 1);
20    deallocateThreadLocalSpace(b, localState);
21}
22
23/** ------------------------------------------------------------------------------------------------------------- *
24 * @brief compileMultiThread
25 *
26 * Given a computation expressed as a logical pipeline of K kernels k0, k_1, ...k_(K-1)
27 * operating over an input stream set S, a segment-parallel implementation divides the input
28 * into segments and coordinates a set of T <= K threads to each process one segment at a time.
29 * Let S_0, S_1, ... S_N be the segments of S.   Segments are assigned to threads in a round-robin
30 * fashion such that processing of segment S_i by the full pipeline is carried out by thread i mod T.
31 ** ------------------------------------------------------------------------------------------------------------- */
32void PipelineCompiler::generateMultiThreadKernelMethod(BuilderRef b, const unsigned numOfThreads) {
33
34    assert (numOfThreads > 1);
35
36    Module * const m = b->getModule();
37    IntegerType * const sizeTy = b->getSizeTy();
38    PointerType * const voidPtrTy = b->getVoidPtrTy();
39
40    ConstantInt * const ZERO = b->getInt32(0);
41    ConstantInt * const ONE = b->getInt32(1);
42    ConstantInt * const TWO = b->getInt32(2);
43
44    // store where we'll resume compiling the DoSegment method
45    const auto resumePoint = b->saveIP();
46    Value * const handle = mPipelineKernel->getHandle(); assert (handle);
47    StructType * const threadStructType = StructType::get(m->getContext(), {handle->getType(), sizeTy, voidPtrTy});
48    FunctionType * const threadFuncType = FunctionType::get(voidPtrTy, {voidPtrTy}, false);
49
50    const auto threadName = mPipelineKernel->getName() + "_DoSegmentThread";
51    Function * const threadFunc = Function::Create(threadFuncType, Function::InternalLinkage, threadName, b->getModule());
52    threadFunc->setCallingConv(CallingConv::C);
53    auto args = threadFunc->arg_begin();
54    args->setName("kernelStateObject");
55
56    // -------------------------------------------------------------------------------------------------------------------------
57    // MAKE PIPELINE THREAD
58    // -------------------------------------------------------------------------------------------------------------------------
59    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", threadFunc));
60    Value * const threadStruct = b->CreateBitCast(&*(args), threadStructType->getPointerTo());
61    mPipelineKernel->setHandle(b, b->CreateLoad(b->CreateGEP(threadStruct, {ZERO, ZERO})));
62    Value * const segmentOffset = b->CreateLoad(b->CreateGEP(threadStruct, {ZERO, ONE}));
63    setThreadLocalSpace(b, b->CreateLoad(b->CreateGEP(threadStruct, {ZERO, TWO})));
64    // generate the pipeline logic for this thread
65    start(b, segmentOffset);
66    for (unsigned i = 0; i < mPipeline.size(); ++i) {
67        setActiveKernel(b, i);
68        synchronize(b);
69        executeKernel(b);
70    }
71    mKernel = nullptr;
72    mKernelIndex = 0;
73    end(b, numOfThreads);
74    // only call pthread_exit() within spawned threads; otherwise it'll be equivalent to calling exit() within the process
75    BasicBlock * const exitThread = b->CreateBasicBlock("ExitThread");
76    BasicBlock * const exitFunction = b->CreateBasicBlock("ExitProcessFunction");
77    b->CreateCondBr(b->CreateIsNull(segmentOffset), exitFunction, exitThread);
78    b->SetInsertPoint(exitThread);
79    Constant * const nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
80    b->CreatePThreadExitCall(nullVoidPtrVal);
81    b->CreateBr(exitFunction);
82    b->SetInsertPoint(exitFunction);
83    b->CreateRet(nullVoidPtrVal);
84
85    // -------------------------------------------------------------------------------------------------------------------------
86    // MAKE PIPELINE DRIVER
87    // -------------------------------------------------------------------------------------------------------------------------
88    b->restoreIP(resumePoint);
89    mPipelineKernel->setHandle(b, handle);
90    const unsigned threads = numOfThreads - 1;
91    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
92    AllocaInst * const pthreads = b->CreateAlloca(pthreadsTy);
93    Value * threadIdPtr[threads];
94    for (unsigned i = 0; i < threads; ++i) {
95        threadIdPtr[i] = b->CreateGEP(pthreads, {ZERO, b->getInt32(i)});
96    }
97    // use the process thread to handle the initial segment function after spawning
98    // (n - 1) threads to handle the subsequent offsets
99    Value * localState[threads];
100    for (unsigned i = 0; i < threads; ++i) {
101        AllocaInst * const threadState = b->CreateAlloca(threadStructType);
102        b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, ZERO}));
103        b->CreateStore(b->getSize(i + 1), b->CreateGEP(threadState, {ZERO, ONE}));
104        localState[i] = allocateThreadLocalSpace(b);
105        b->CreateStore(localState[i], b->CreateGEP(threadState, {ZERO, TWO}));
106        b->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
107    }
108
109    AllocaInst * const threadState = b->CreateAlloca(threadStructType);
110    b->CreateStore(handle, b->CreateGEP(threadState, {ZERO, ZERO}));
111    b->CreateStore(b->getSize(0), b->CreateGEP(threadState, {ZERO, ONE}));
112    b->CreateCall(threadFunc, b->CreatePointerCast(threadState, voidPtrTy));
113
114    AllocaInst * const status = b->CreateAlloca(voidPtrTy);
115    for (unsigned i = 0; i < threads; ++i) {
116        Value * threadId = b->CreateLoad(threadIdPtr[i]);
117        b->CreatePThreadJoinCall(threadId, status);
118        deallocateThreadLocalSpace(b, localState[i]);
119    }
120
121
122}
123
124enum : int {
125    POP_COUNT_STRUCT_INDEX = 0
126};
127
128
129/** ------------------------------------------------------------------------------------------------------------- *
130 * @brief allocateThreadLocalSpace
131 ** ------------------------------------------------------------------------------------------------------------- */
132inline Value * PipelineCompiler::allocateThreadLocalSpace(BuilderRef b) {
133    // malloc the local state object
134    StructType * const popCountTy = getPopCountThreadLocalStateType(b);
135    StructType * const localStateTy = StructType::get(popCountTy, nullptr);
136    Value * const localState = b->CreateCacheAlignedAlloca(localStateTy);
137    // and any pop count refs
138    Constant * const ZERO = b->getInt32(0);
139    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
140    allocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
141    return localState;
142}
143
144/** ------------------------------------------------------------------------------------------------------------- *
145 * @brief setThreadLocalSpace
146 ** ------------------------------------------------------------------------------------------------------------- */
147inline void PipelineCompiler::setThreadLocalSpace(BuilderRef b, Value * const localState) {
148    Constant * const ZERO = b->getInt32(0);
149    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
150    mPopCountState = b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT});
151}
152
153/** ------------------------------------------------------------------------------------------------------------- *
154 * @brief deallocateThreadLocalSpace
155 ** ------------------------------------------------------------------------------------------------------------- */
156inline void PipelineCompiler::deallocateThreadLocalSpace(BuilderRef b, Value * const localState) {
157    Constant * const ZERO = b->getInt32(0);
158    Constant * const POP_COUNT_STRUCT = b->getInt32(POP_COUNT_STRUCT_INDEX);
159    deallocatePopCountArrays(b, b->CreateGEP(localState, {ZERO, POP_COUNT_STRUCT}));
160}
161
162}
163
164#endif // PIPELINE_LOGIC_HPP
Note: See TracBrowser for help on using the repository browser.