source: icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp @ 5242

Last change on this file since 5242 was 5242, checked in by cameron, 2 years ago

Add pthread create, join and exit support into CBuilder

File size: 9.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "pipeline.h"
7#include <toolchain.h>
8#include <IR_Gen/idisa_builder.h>
9#include <kernels/interface.h>
10#include <kernels/kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <iostream>
13
14using namespace kernel;
15
16Function * generateSegmentParallelPipelineThreadFunction(std::string name, IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels, Type * sharedStructType, int id) {
17
18    Module * m = iBuilder->getModule();
19    Type * const size_ty = iBuilder->getSizeTy();
20    Type * const voidTy = iBuilder->getVoidTy();
21    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
22    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
23
24    Function * const threadFunc = cast<Function>(m->getOrInsertFunction(name, voidTy, int8PtrTy, nullptr));
25    threadFunc->setCallingConv(CallingConv::C);
26    Function::arg_iterator args = threadFunc->arg_begin();
27
28    Value * const input = &*(args++);
29    input->setName("input");
30
31    unsigned threadNum = codegen::ThreadNum;
32
33     // Create the basic blocks for the thread function.
34    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc, 0);
35    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc, 0);
36    BasicBlock * exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc, 0);
37    std::vector<BasicBlock *> segmentWait;
38    std::vector<BasicBlock *> segmentLoopBody;
39    for (unsigned i = 0; i < kernels.size(); i++) {
40        segmentWait.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
41        segmentLoopBody.push_back(BasicBlock::Create(iBuilder->getContext(), "segmentWait"+std::to_string(i), threadFunc, 0));
42    }
43
44    iBuilder->SetInsertPoint(entryBlock);
45    Value * sharedStruct = iBuilder->CreateBitCast(input, PointerType::get(sharedStructType, 0));
46    Constant * myThreadId = ConstantInt::get(size_ty, id);
47    std::vector<Value *> instancePtrs;
48    for (unsigned i = 0; i < kernels.size(); i++) {
49        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
50        instancePtrs.push_back(iBuilder->CreateLoad(ptr));
51    }
52   
53    // Some important constant values.
54    int segmentSize = codegen::SegmentSize;
55    Constant * segmentBlocks = ConstantInt::get(size_ty, segmentSize);
56    iBuilder->CreateBr(segmentLoop);
57
58    iBuilder->SetInsertPoint(segmentLoop);
59    PHINode * segNo = iBuilder->CreatePHI(size_ty, 2, "segNo");
60    segNo->addIncoming(myThreadId, entryBlock);
61    unsigned last_kernel = kernels.size() - 1;
62    Value * alreadyDone = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
63    iBuilder->CreateCondBr(alreadyDone, exitThreadBlock, segmentWait[0]);
64
65    for (unsigned i = 0; i < kernels.size(); i++) {
66        iBuilder->SetInsertPoint(segmentWait[i]);
67        Value * processedSegmentCount = kernels[i]->acquireLogicalSegmentNo(instancePtrs[i]);
68        Value * cond = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
69        iBuilder->CreateCondBr(cond, segmentLoopBody[i], segmentWait[i]);
70
71        iBuilder->SetInsertPoint(segmentLoopBody[i]);
72        kernels[i]->createDoSegmentCall(instancePtrs[i], segmentBlocks);
73        // Must be the last action, for synchronization.
74        kernels[i]->releaseLogicalSegmentNo(instancePtrs[i], iBuilder->CreateAdd(processedSegmentCount, iBuilder->getSize(1)));
75        if (i == last_kernel) break;
76        iBuilder->CreateBr(segmentWait[i+1]);
77    }
78   
79    segNo->addIncoming(iBuilder->CreateAdd(segNo, ConstantInt::get(size_ty, threadNum)), segmentLoopBody[last_kernel]);
80    Value * endSignal = kernels[last_kernel]->getTerminationSignal(instancePtrs[last_kernel]);
81    iBuilder->CreateCondBr(endSignal, exitThreadBlock, segmentLoop);
82   
83    iBuilder->SetInsertPoint(exitThreadBlock);
84    Value * nullVal = Constant::getNullValue(voidPtrTy);
85    iBuilder->CreatePThreadExitCall(nullVal);
86    iBuilder->CreateRetVoid();
87
88    return threadFunc;
89}
90
91void generateSegmentParallelPipeline(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
92   
93    unsigned threadNum = codegen::ThreadNum;
94
95    Module * m = iBuilder->getModule();
96
97    Type * const size_ty = iBuilder->getSizeTy();
98    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
99    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
100
101    for (auto k : kernels) k->createInstance();
102
103    Type * const pthreadsTy = ArrayType::get(size_ty, threadNum);
104    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
105    std::vector<Value *> pthreadsPtrs;
106    for (unsigned i = 0; i < threadNum; i++) {
107        pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
108    }
109    Value * nullVal = Constant::getNullValue(voidPtrTy);
110    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
111
112    std::vector<Type *> structTypes;
113    for (unsigned i = 0; i < kernels.size(); i++) {
114        structTypes.push_back(kernels[i]->getInstance()->getType());
115    }
116    Type * sharedStructType = StructType::get(m->getContext(), structTypes);
117
118    AllocaInst * sharedStruct = iBuilder->CreateAlloca(sharedStructType);
119    for (unsigned i = 0; i < kernels.size(); i++) {
120        Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
121        iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
122    }
123
124    std::vector<Function *> thread_functions;
125    const auto ip = iBuilder->saveIP();
126    for (unsigned i = 0; i < threadNum; i++) {
127        thread_functions.push_back(generateSegmentParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, i));
128    }
129    iBuilder->restoreIP(ip);
130
131    for (unsigned i = 0; i < threadNum; i++) {
132        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, thread_functions[i], iBuilder->CreateBitCast(sharedStruct, int8PtrTy));
133    }
134
135    std::vector<Value *> threadIDs;
136    for (unsigned i = 0; i < threadNum; i++) { 
137        threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
138    }
139   
140    for (unsigned i = 0; i < threadNum; i++) { 
141        iBuilder->CreatePThreadJoinCall(threadIDs[i], status);
142    }
143
144}
145
146void generatePipelineParallel(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
147 
148    Type * pthreadTy = iBuilder->getSizeTy();
149    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
150    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
151
152    Type * const pthreadsTy = ArrayType::get(pthreadTy, kernels.size());
153
154    for (auto k : kernels) k->createInstance();
155
156    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
157    std::vector<Value *> pthreadsPtrs;
158    for (unsigned i = 0; i < kernels.size(); i++) {
159        pthreadsPtrs.push_back(iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)}));
160    }
161    Value * nullVal = Constant::getNullValue(voidPtrTy);
162    AllocaInst * const status = iBuilder->CreateAlloca(int8PtrTy);
163
164    std::vector<Function *> kernel_functions;
165    const auto ip = iBuilder->saveIP();
166    for (unsigned i = 0; i < kernels.size(); i++) {
167        kernel_functions.push_back(kernels[i]->generateThreadFunction("k_"+std::to_string(i)));
168    }
169    iBuilder->restoreIP(ip);
170
171    for (unsigned i = 0; i < kernels.size(); i++) {
172        iBuilder->CreatePThreadCreateCall(pthreadsPtrs[i], nullVal, kernel_functions[i], iBuilder->CreateBitCast(kernels[i]->getInstance(), int8PtrTy));
173    }
174
175    std::vector<Value *> threadIDs;
176    for (unsigned i = 0; i < kernels.size(); i++) { 
177        threadIDs.push_back(iBuilder->CreateLoad(pthreadsPtrs[i]));
178    }
179   
180    for (unsigned i = 0; i < kernels.size(); i++) { 
181        iBuilder->CreatePThreadJoinCall(threadIDs[i], status);
182    }
183}
184
185
186void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, std::vector<KernelBuilder *> kernels) {
187   
188    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
189    Function * main = entryBlock->getParent();
190       
191    const unsigned segmentSize = codegen::SegmentSize;
192    Type * const size_ty = iBuilder->getSizeTy();
193
194    // Create the basic blocks for the loop.
195    BasicBlock * segmentBlock = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", main, 0);
196    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exitBlock", main, 0);
197    for (auto k : kernels) k->createInstance();
198    iBuilder->CreateBr(segmentBlock);
199    iBuilder->SetInsertPoint(segmentBlock);
200    Constant * segBlocks = ConstantInt::get(size_ty, segmentSize * iBuilder->getStride() / iBuilder->getBitBlockWidth());
201    for (unsigned i = 0; i < kernels.size(); i++) {
202        kernels[i]->createDoSegmentCall(kernels[i]->getInstance(), segBlocks);
203        Value * segNo = kernels[i]->acquireLogicalSegmentNo(kernels[i]->getInstance());
204        kernels[i]->releaseLogicalSegmentNo(kernels[i]->getInstance(), iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
205    }
206    Value * endSignal = kernels.back()->getTerminationSignal(kernels.back()->getInstance());
207    iBuilder->CreateCondBr(endSignal, exitBlock, segmentBlock);
208    iBuilder->SetInsertPoint(exitBlock);
209
210}
Note: See TracBrowser for help on using the repository browser.