source: icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp @ 5431

Last change on this file since 5431 was 5374, checked in by cameron, 2 years ago

Unique names for IDISA builders

File size: 11.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_nvptx_builder.h"
8#include <llvm/IR/InlineAsm.h>
9#include <llvm/IR/Module.h>
10
11namespace IDISA {
12   
13std::string IDISA_NVPTX20_Builder::getBuilderUniqueName() { return "NVPTX20_" + std::to_string(groupThreads);}
14
15int IDISA_NVPTX20_Builder::getGroupThreads(){
16    return groupThreads;
17}
18
19Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
20    Type * const int32ty = getInt32Ty();
21    Function * barrierOrFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
22    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
23    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
24    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
25    return CreateICmpNE(anyNonZero,  ConstantInt::getNullValue(int32ty));
26}
27
28Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
29    Type * const int64ty = getInt64Ty();
30    Value * id = CreateCall(tidFunc);
31    Value * id64 = CreateZExt(id, int64ty);
32    Value * threadSize = getInt64(groupThreads);
33    Value * fullBlocks = CreateUDiv(pos, threadSize);
34    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
35    Value * finalBlockMask = CreateShl(getInt64(-1), CreateURem(pos, threadSize));
36    Value * unusedBlockMask = CreateSExt(CreateICmpUGT(id64, fullBlocks), int64ty);
37    return CreateBitCast(CreateOr(CreateAnd(finalBlockMask, finalBlockSelect), unusedBlockMask), mBitBlockType);
38}
39
40Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
41    Type * const int64ty = getInt64Ty();
42    Value * id = CreateCall(tidFunc);
43    Value * id64 = CreateZExt(id, int64ty);
44    Value * threadSize = getInt64(groupThreads);
45    Value * fullBlocks = CreateUDiv(pos, threadSize);
46    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
47    Value * finalBlockMask = CreateShl(getInt64(1), CreateURem(pos, threadSize));
48    return CreateBitCast(CreateAnd(finalBlockMask, finalBlockSelect), mBitBlockType);
49}
50   
51std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_advance(Value * a, Value * shiftin, unsigned shift) {
52    Value * id = CreateCall(tidFunc);
53    Value * retVal = CreateCall(mLongAdvanceFunc, {id, a, CreateBitCast(getInt64(shift), mBitBlockType), shiftin});
54    Value * shifted = CreateExtractValue(retVal, {0});
55    Value * shiftOut = CreateExtractValue(retVal, {1});
56    return std::pair<Value *, Value *>(shiftOut, shifted);
57}
58
59std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_add_with_carry(Value * a, Value * b, Value * carryIn) {
60    Value * id = CreateCall(tidFunc);
61    Value * retVal = CreateCall(mLongAddFunc, {id, a, b, carryIn});
62    Value * sum = CreateExtractValue(retVal, {0});
63    Value * carry_out_strm = CreateExtractValue(retVal, {1});
64    return std::pair<Value *, Value *>(carry_out_strm, sum);
65}
66
67void IDISA_NVPTX20_Builder::CreateGlobals(){
68
69    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
70    carry = new GlobalVariable(*mMod,
71        /*Type=*/carryTy,
72        /*isConstant=*/false,
73        /*Linkage=*/llvm::GlobalValue::InternalLinkage,
74        /*Initializer=*/0, 
75        /*Name=*/"carry",
76        /*InsertBefore*/nullptr,
77        /*TLMode */llvm::GlobalValue::NotThreadLocal,
78        /*AddressSpace*/ 3,
79        /*isExternallyInitialized*/false);
80
81    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
82
83    bubble = new GlobalVariable(*mMod,
84        /*Type=*/bubbleTy,
85        /*isConstant=*/false,
86        /*Linkage=*/llvm::GlobalValue::InternalLinkage,
87        /*Initializer=*/0, 
88        /*Name=*/"bubble",
89        /*InsertBefore*/nullptr,
90        /*TLMode */llvm::GlobalValue::NotThreadLocal,
91        /*AddressSpace*/ 3,
92        /*isExternallyInitialized*/false);
93   
94    ConstantAggregateZero* carryConstArray = ConstantAggregateZero::get(carryTy);
95    carry->setInitializer(carryConstArray);
96    ConstantAggregateZero* bubbleConstAray = ConstantAggregateZero::get(bubbleTy);
97    bubble->setInitializer(bubbleConstAray);
98
99}
100
101void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){
102    Type * const voidTy = getVoidTy();
103    Type * const int32ty = getInt32Ty();
104    barrierFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
105    tidFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
106
107}
108
109void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
110  Type * const int32ty = getInt32Ty();
111  Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
112
113  mLongAdvanceFunc = cast<Function>(mMod->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
114  mLongAdvanceFunc->setCallingConv(CallingConv::C);
115  Function::arg_iterator args = mLongAdvanceFunc->arg_begin();
116
117  Value * const id = &*(args++);
118  id->setName("id");
119  Value * const val = &*(args++);
120  val->setName("val");
121  Value * const shftAmount = &*(args++);
122  shftAmount->setName("shftAmount");
123  Value * const blockCarry = &*(args++);
124  blockCarry->setName("blockCarry");
125
126  SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mLongAdvanceFunc,0));
127
128  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
129  CreateStore(blockCarry, firstCarryPtr);
130
131  Value * adv0 = CreateShl(val, shftAmount);
132  Value * nextid = CreateAdd(id, getInt32(1));
133  Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
134  Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
135  CreateStore(lshr0, carryNextPtr);
136
137  CreateCall(barrierFunc);
138
139  Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
140  Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
141
142  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
143  Value * carryVal = CreateLoad(carryPtr, "carryVal");
144  Value * adv1 = CreateOr(adv0, carryVal);
145
146 
147  Value * retVal = UndefValue::get(returnType);
148  retVal = CreateInsertValue(retVal, adv1, 0);
149  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
150  CreateRet(retVal);
151
152}
153
154                                           
155                                           
156void IDISA_NVPTX20_Builder::CreateLongAddFunc(){
157  Type * const int64ty = getInt64Ty();
158  Type * const int32ty = getInt32Ty();
159  Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
160
161  mLongAddFunc = cast<Function>(mMod->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
162  mLongAddFunc->setCallingConv(CallingConv::C);
163  Function::arg_iterator args = mLongAddFunc->arg_begin();
164
165  Value * const id = &*(args++);
166  id->setName("id");
167  Value * const valA = &*(args++);
168  valA->setName("valA");
169  Value * const valB = &*(args++);
170  valB->setName("valB");
171  Value * const blockCarry = &*(args++);
172  blockCarry->setName("blockCarry");
173
174  BasicBlock * entryBlock = BasicBlock::Create(mMod->getContext(), "entry", mLongAddFunc, 0);
175  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(mMod->getContext(), "bubbleCalculate", mLongAddFunc, 0);
176  BasicBlock * bubbleSetBlock = BasicBlock::Create(mMod->getContext(), "bubbleSet", mLongAddFunc, 0);
177
178  SetInsertPoint(entryBlock);
179
180  Value * id64 = CreateZExt(id, int64ty);
181
182  Value * partial_sum = CreateAdd(valA, valB);
183  Value * gen = CreateAnd(valA, valB);
184  Value * prop = CreateXor(valA, valB);
185
186  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
187  Value * carryInitVal = CreateAnd(CreateOr(gen, CreateAnd(prop, CreateNot(partial_sum))), CreateBitCast(getInt64(0x8000000000000000), mBitBlockType));
188  carryInitVal = CreateLShr(carryInitVal, CreateBitCast(CreateSub(getInt64(63), id64), mBitBlockType));
189  CreateStore(carryInitVal, carryPtr);
190
191  Value * bubbleCond = CreateICmpEQ(CreateAdd(CreateBitCast(partial_sum, int64ty), getInt64(1)), getInt64(0));
192  CreateCondBr(bubbleCond, bubbleCalculateBlock, bubbleSetBlock);
193
194  SetInsertPoint(bubbleCalculateBlock);
195  Value * calcBubble = CreateBitCast(CreateShl(getInt64(1), id64), mBitBlockType);
196  CreateBr(bubbleSetBlock);
197
198  SetInsertPoint(bubbleSetBlock);
199  PHINode * bubbleInitVal = CreatePHI(mBitBlockType, 2, "bubbleInitVal");
200  bubbleInitVal->addIncoming(CreateBitCast(getInt64(0), mBitBlockType), entryBlock);
201  bubbleInitVal->addIncoming(calcBubble, bubbleCalculateBlock);
202
203  Value * bubblePtr = CreateGEP(bubble, {getInt32(0), id});
204  CreateStore(bubbleInitVal, bubblePtr);
205
206  CreateCall(barrierFunc);
207
208  Value * carryOffsetPtr = nullptr;
209  Value * carryVal = carryInitVal;
210  Value * bubbleOffsetPtr = nullptr;
211  Value * bubbleVal = bubbleInitVal;
212
213  for (int offset=groupThreads/2; offset>0; offset=offset>>1){
214    carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
215    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
216    CreateStore(carryVal, carryPtr);
217    bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
218    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
219    CreateStore(bubbleVal, bubblePtr);
220    CreateCall(barrierFunc);
221  }
222
223  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
224  Value * carryVal0 = CreateLoad(firstCarryPtr, "carry0");
225  Value * carry_mask = CreateOr(CreateShl(carryVal0, 1), blockCarry);
226  Value * firstBubblePtr = CreateGEP(bubble, {getInt32(0), getInt32(0)});
227  Value * bubble_mask = CreateLoad(firstBubblePtr, "bubble_mask");
228
229  Value * s = CreateAnd(CreateAdd(carry_mask, bubble_mask), CreateNot(bubble_mask));
230  Value * inc = CreateOr(s, CreateSub(s, carry_mask));
231  Value * rslt = CreateAdd(partial_sum, CreateAnd(CreateLShr(inc, CreateBitCast(id64, mBitBlockType)), CreateBitCast(getInt64(1), mBitBlockType)));
232
233  Value * blockCarryOut = CreateLShr(CreateOr(carryVal0, CreateAnd(bubble_mask, inc)), 63);
234
235  Value * retVal = UndefValue::get(returnType);
236  retVal = CreateInsertValue(retVal, rslt, 0);
237  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
238  CreateRet(retVal);
239
240}
241
242void IDISA_NVPTX20_Builder::CreateBallotFunc(){
243    Type * const int32ty = getInt32Ty();
244    Type * const int1ty = getInt1Ty();
245    Function * const ballotFn = cast<Function>(mMod->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
246    ballotFn->setCallingConv(CallingConv::C);
247    Function::arg_iterator args = ballotFn->arg_begin();
248
249    Value * const input = &*(args++);
250    input->setName("input");
251
252    SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", ballotFn, 0));
253
254    Value * conv = CreateZExt(input, int32ty);
255
256    const char * AsmStream = "{.reg .pred %p1;"
257                             "setp.ne.u32 %p1, $1, 0;"
258                             "vote.ballot.b32  $0, %p1;}";
259    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
260    llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
261    llvm::CallInst * result = CreateCall(IA, conv);
262    result->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoUnwind);
263
264    CreateRet(result);
265}
266
267LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
268    return CreateLoad(ptr);
269   
270}
271StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
272    return CreateStore(val, ptr);
273}
274
275   
276}
Note: See TracBrowser for help on using the repository browser.