source: icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp @ 5224

Last change on this file since 5224 was 5224, checked in by cameron, 3 years ago

Move responsibility for acquire/release of logical segment number into pipeline compilers.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6
7#include <toolchain.h>
8#include "pipeline.h"
9
10#include <IDISA/idisa_builder.h>
11
12#include <kernels/interface.h>
13#include <kernels/kernel.h>
14#include <kernels/s2p_kernel.h>
15
16#include <llvm/IR/TypeBuilder.h>
17#include <iostream>
18
19using namespace kernel;
20
21Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels, Type * sharedStructType, int id) {
22
23    Module * m = iBuilder->getModule();
24    Type * const size_ty = iBuilder->getSizeTy();
25    Type * const voidTy = Type::getVoidTy(m->getContext());
26    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
27    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
28
29    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
30    threadFunc->setCallingConv(CallingConv::C);
31    Function::arg_iterator args = threadFunc->arg_begin();
32
33    Value * const input = &*(args++);
34    input->setName("input");
35
36    unsigned threadNum = codegen::ThreadNum;
37
38     // Create the basic blocks for the thread function.
39    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
40    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0);
41    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
42    std::vector<BasicBlock *> segmentWait;
43    std::vector<BasicBlock *> segmentLoopBody;
44    for (unsigned i = 0; i < kernels.size(); i++) {
45        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
46        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
47    }
48
49    iBuilder->SetInsertPoint(entryBlock);
50    Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
51    Constant * myThreadId = ConstantInt::get(size_ty, id);
52    std::vector<Value *> instancePtrs;
53    for (unsigned i = 0; i < kernels.size(); i++) {
54        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
55        instancePtrs.push_back(iBuilder->CreateLoad(ptr));
56    }
57   
58    // Some important constant values.
59    int segmentSize = codegen::SegmentSize;
60    Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
61    iBuilder->CreateBr(segmentLoop);
62
63    iBuilder->SetInsertPoint(segmentLoop);
64    PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo");
65    segNo->addIncoming(myThreadId, entryBlock);
66    unsigned last_kernel = kernels.size() - 1;
67    Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
68    iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]);
69
70    for (unsigned i = 0; i < kernels.size(); i++) {
71        iBuilder->SetInsertPoint(segmentWait[i]);
72        Value * processedSegmentCount = kernels[i]->acquireLogicalSegmentNo(instancePtrs[i]);
73        Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
74        iBuilder->CreateCondBr(cond, segmentLoopBody[i], segmentWait[i]);
75
76        iBuilder->SetInsertPoint(segmentLoopBody[i]);
77        kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks);
78        // Must be the last action, for synchronization.
79        kernels[i]->releaseLogicalSegmentNo(instancePtrs[i], iBuilder->CreateAdd(processedSegmentCount, ConstantInt::get(iBuilder->getSizeTy(), 1)));
80        if (i == last_kernel) break;
81        iBuilder->CreateBr(segmentWait[i+1]);
82    }
83   
84    segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]);
85    Value * endSignal = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
86    iBuilder->CreateCondBr(endSignal, exitThreadBlock, segmentLoop);
87   
88    iBuilder->SetInsertPoint(exitThreadBlock);
89    Value * nullVal = Constant::getNullValue(voidPtrTy);
90    Function * pthreadExitFunc = m->getFunction("pthread_exit");
91    CallInst * exitThread = iBuilder->CreateCall(pthreadExitFunc, {nullVal});
92    exitThread->setDoesNotReturn();
93    iBuilder->CreateRetVoid();
94
95    return threadFunc;
96}
97
98void generateSegmentParallelPipeline(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
99   
100    unsigned threadNum = codegen::ThreadNum;
101
102    Module * m = iBuilder->getModule();
103
104    Type * const size_ty = iBuilder->getSizeTy();
105    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
106    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
107
108    for (auto k : kernels) k->createInstance();
109
110    Type * const pthreadsTy = ArrayType::get(size_ty, threadNum);
111    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
112    std::vector<Value *> pthreadsPtrs;
113    for (unsigned i = 0; i < threadNum; i++) {
114        pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
115    }
116    Value * nullVal = Constant::getNullValue(voidPtrTy);
117    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
118
119    std::vector<Type *> structTypes;
120    for (unsigned i = 0; i < kernels.size(); i++) {
121        structTypes.push_back(kernels[i]->getInstance()->getType());
122    }
123    Type * sharedStructType = StructType::get(m->getContext(), structTypes);
124
125    AllocaInst * sharedStruct = iBuilder->CreateAlloca(sharedStructType);
126    for (unsigned i = 0; i < kernels.size(); i++) {
127        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
128        iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
129    }
130
131    std::vector<Function *> thread_functions;
132    const auto ip = iBuilder->saveIP();
133    for (unsigned i = 0; i < threadNum; i++) {
134        thread_functions.push_back(generateSegmentParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, i));
135    }
136    iBuilder->restoreIP(ip);
137
138    Function * pthreadCreateFunc = m->getFunction("pthread_create");
139    Function * pthreadJoinFunc = m->getFunction("pthread_join");
140
141    for (unsigned i = 0; i < threadNum; i++) {
142        iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtrs[i], nullVal, thread_functions[i], iBuilder->CreateBitCast(sharedStruct, int8PtrTy)}));
143    }
144
145    std::vector<Value *> threadIDs;
146    for (unsigned i = 0; i < threadNum; i++) { 
147        threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
148    }
149   
150    for (unsigned i = 0; i < threadNum; i++) { 
151        iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadIDs[i], status}));
152    }
153
154}
155
156void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
157 
158    Module * m = iBuilder->getModule();
159
160    Type * pthreadTy = iBuilder->getSizeTy();     
161    Type * const voidPtrTy = TypeBuilder<void *, false>::get(m->getContext());
162    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
163
164    Type * const pthreadsTy = ArrayType::get(pthreadTy, kernels.size());
165
166    for (auto k : kernels) k->createInstance();
167
168    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
169    std::vector<Value *> pthreadsPtrs;
170    for (unsigned i = 0; i < kernels.size(); i++) {
171        pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
172    }
173    Value * nullVal = Constant::getNullValue(voidPtrTy);
174    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
175
176    std::vector<Function *> kernel_functions;
177    const auto ip = iBuilder->saveIP();
178    for (unsigned i = 0; i < kernels.size(); i++) {
179        kernel_functions.push_back(kernels[i]->generateThreadFunction("k_"+std::to_string(i)));
180    }
181    iBuilder->restoreIP(ip);
182
183    Function * pthreadCreateFunc = m->getFunction("pthread_create");
184    Function * pthreadJoinFunc = m->getFunction("pthread_join");
185
186    for (unsigned i = 0; i < kernels.size(); i++) {
187        iBuilder->CreateCall(pthreadCreateFunc, std::vector<Value *>({pthreadsPtrs[i], nullVal, kernel_functions[i], iBuilder->CreateBitCast(kernels[i]->getInstance(), int8PtrTy)}));
188    }
189
190    std::vector<Value *> threadIDs;
191    for (unsigned i = 0; i < kernels.size(); i++) { 
192        threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
193    }
194   
195    for (unsigned i = 0; i < kernels.size(); i++) { 
196        iBuilder->CreateCall(pthreadJoinFunc, std::vector<Value *>({threadIDs[i], status}));
197    }
198}
199
200
201void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
202   
203    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
204    Function * main = entryBlock->getParent();
205       
206    const unsigned segmentSize = codegen::SegmentSize;
207    Type * const size_ty = iBuilder->getSizeTy();
208
209    // Create the basic blocks for the loop.
210    BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
211    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
212    for (auto k : kernels) k->createInstance();
213    iBuilder->CreateBr(segmentBlock);
214    iBuilder->SetInsertPoint(segmentBlock);
215    Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
216    for (unsigned i = 0; i < kernels.size(); i++) {
217        kernels[i]->createDoSegmentCall(kernels[i]->getInstance(), segBlocks);
218        Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
219        kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, ConstantInt::get(iBuilder->getSizeTy(), 1)));
220    }
221    Value * endSignal = kernels.back()->getTerminationSignal(kernels.back()->getInstance());
222    iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock);
223    iBuilder->SetInsertPoint(exitBlock);
224
225}
Note: See TracBrowser for help on using the repository browser.