source: icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp @ 5292

Last change on this file since 5292 was 5292, checked in by nmedfort, 2 years ago

Removed 'function' and 'self' parameters from generateXXXMethod() functions.

File size: 11.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_nvptx_builder.h"
8#include <llvm/IR/IRBuilder.h>
9#include <llvm/IR/Constants.h>
10#include <llvm/IR/Intrinsics.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/InlineAsm.h>
13#include <llvm/IR/Module.h>
14
15namespace IDISA {
16
17int IDISA_NVPTX20_Builder::getGroupThreads(){
18    return groupThreads;
19}
20
21Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
22    Type * const int32ty = getInt32Ty();
23    Function * barrierOrFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
24    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::get(mBitBlockType, 0));
25    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
26    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
27    return CreateICmpNE(anyNonZero,  ConstantInt::get(int32ty, 0));
28}
29
30Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
31    Type * const int64ty = getInt64Ty();
32    Value * id = CreateCall(tidFunc);
33    Value * id64 = CreateZExt(id, int64ty);
34    Value * threadSize = getInt64(groupThreads);
35    Value * fullBlocks = CreateUDiv(pos, threadSize);
36    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
37    Value * finalBlockMask = CreateShl(getInt64(-1), CreateURem(pos, threadSize));
38    Value * unusedBlockMask = CreateSExt(CreateICmpUGT(id64, fullBlocks), int64ty);
39    return CreateBitCast(CreateOr(CreateAnd(finalBlockMask, finalBlockSelect), unusedBlockMask), mBitBlockType);
40}
41
42Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
43    Type * const int64ty = getInt64Ty();
44    Value * id = CreateCall(tidFunc);
45    Value * id64 = CreateZExt(id, int64ty);
46    Value * threadSize = getInt64(groupThreads);
47    Value * fullBlocks = CreateUDiv(pos, threadSize);
48    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
49    Value * finalBlockMask = CreateShl(getInt64(1), CreateURem(pos, threadSize));
50    return CreateBitCast(CreateAnd(finalBlockMask, finalBlockSelect), mBitBlockType);
51}
52   
53std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_advance(Value * a, Value * shiftin, unsigned shift) {
54    Value * id = CreateCall(tidFunc);
55    Value * retVal = CreateCall(mLongAdvanceFunc, {id, a, CreateBitCast(getInt64(shift), mBitBlockType), shiftin});
56    Value * shifted = CreateExtractValue(retVal, {0});
57    Value * shiftOut = CreateExtractValue(retVal, {1});
58    return std::pair<Value *, Value *>(shiftOut, shifted);
59}
60
61std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_add_with_carry(Value * a, Value * b, Value * carryIn) {
62    Value * id = CreateCall(tidFunc);
63    Value * retVal = CreateCall(mLongAddFunc, {id, a, b, carryIn});
64    Value * sum = CreateExtractValue(retVal, {0});
65    Value * carry_out_strm = CreateExtractValue(retVal, {1});
66    return std::pair<Value *, Value *>(carry_out_strm, sum);
67}
68
69void IDISA_NVPTX20_Builder::CreateGlobals(){
70
71    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
72    carry = new GlobalVariable(*mMod,
73        /*Type=*/carryTy,
74        /*isConstant=*/false,
75        /*Linkage=*/llvm::GlobalValue::InternalLinkage,
76        /*Initializer=*/0, 
77        /*Name=*/"carry",
78        /*InsertBefore*/nullptr,
79        /*TLMode */llvm::GlobalValue::NotThreadLocal,
80        /*AddressSpace*/ 3,
81        /*isExternallyInitialized*/false);
82
83    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
84
85    bubble = new GlobalVariable(*mMod,
86        /*Type=*/bubbleTy,
87        /*isConstant=*/false,
88        /*Linkage=*/llvm::GlobalValue::InternalLinkage,
89        /*Initializer=*/0, 
90        /*Name=*/"bubble",
91        /*InsertBefore*/nullptr,
92        /*TLMode */llvm::GlobalValue::NotThreadLocal,
93        /*AddressSpace*/ 3,
94        /*isExternallyInitialized*/false);
95   
96    ConstantAggregateZero* carryConstArray = ConstantAggregateZero::get(carryTy);
97    carry->setInitializer(carryConstArray);
98    ConstantAggregateZero* bubbleConstAray = ConstantAggregateZero::get(bubbleTy);
99    bubble->setInitializer(bubbleConstAray);
100
101}
102
103void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){   
104    Type * const voidTy = getVoidTy();
105    Type * const int32ty = getInt32Ty();
106    barrierFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
107    tidFunc = cast<Function>(mMod->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
108
109}
110
111void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
112  Type * const int32ty = getInt32Ty();
113  Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
114
115  mLongAdvanceFunc = cast<Function>(mMod->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
116  mLongAdvanceFunc->setCallingConv(CallingConv::C);
117  Function::arg_iterator args = mLongAdvanceFunc->arg_begin();
118
119  Value * const id = &*(args++);
120  id->setName("id");
121  Value * const val = &*(args++);
122  val->setName("val");
123  Value * const shftAmount = &*(args++);
124  shftAmount->setName("shftAmount");
125  Value * const blockCarry = &*(args++);
126  blockCarry->setName("blockCarry");
127
128  SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mLongAdvanceFunc,0));
129
130  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
131  CreateStore(blockCarry, firstCarryPtr);
132
133  Value * adv0 = CreateShl(val, shftAmount);
134  Value * nextid = CreateAdd(id, getInt32(1));
135  Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
136  Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
137  CreateStore(lshr0, carryNextPtr);
138
139  CreateCall(barrierFunc);
140
141  Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
142  Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
143
144  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
145  Value * carryVal = CreateLoad(carryPtr, "carryVal");
146  Value * adv1 = CreateOr(adv0, carryVal);
147
148 
149  Value * retVal = UndefValue::get(returnType);
150  retVal = CreateInsertValue(retVal, adv1, 0);
151  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
152  CreateRet(retVal);
153
154}
155
156                                           
157                                           
158void IDISA_NVPTX20_Builder::CreateLongAddFunc(){
159  Type * const int64ty = getInt64Ty();
160  Type * const int32ty = getInt32Ty();
161  Type * returnType = StructType::get(mMod->getContext(), {mBitBlockType, mBitBlockType});
162
163  mLongAddFunc = cast<Function>(mMod->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
164  mLongAddFunc->setCallingConv(CallingConv::C);
165  Function::arg_iterator args = mLongAddFunc->arg_begin();
166
167  Value * const id = &*(args++);
168  id->setName("id");
169  Value * const valA = &*(args++);
170  valA->setName("valA");
171  Value * const valB = &*(args++);
172  valB->setName("valB");
173  Value * const blockCarry = &*(args++);
174  blockCarry->setName("blockCarry");
175
176  BasicBlock * entryBlock = BasicBlock::Create(mMod->getContext(), "entry", mLongAddFunc, 0);
177  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(mMod->getContext(), "bubbleCalculate", mLongAddFunc, 0);
178  BasicBlock * bubbleSetBlock = BasicBlock::Create(mMod->getContext(), "bubbleSet", mLongAddFunc, 0);
179
180  SetInsertPoint(entryBlock);
181
182  Value * id64 = CreateZExt(id, int64ty);
183
184  Value * partial_sum = CreateAdd(valA, valB);
185  Value * gen = CreateAnd(valA, valB);
186  Value * prop = CreateXor(valA, valB);
187
188  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
189  Value * carryInitVal = CreateAnd(CreateOr(gen, CreateAnd(prop, CreateNot(partial_sum))), CreateBitCast(getInt64(0x8000000000000000), mBitBlockType));
190  carryInitVal = CreateLShr(carryInitVal, CreateBitCast(CreateSub(getInt64(63), id64), mBitBlockType));
191  CreateStore(carryInitVal, carryPtr);
192
193  Value * bubbleCond = CreateICmpEQ(CreateAdd(CreateBitCast(partial_sum, int64ty), getInt64(1)), getInt64(0));
194  CreateCondBr(bubbleCond, bubbleCalculateBlock, bubbleSetBlock);
195
196  SetInsertPoint(bubbleCalculateBlock);
197  Value * calcBubble = CreateBitCast(CreateShl(getInt64(1), id64), mBitBlockType);
198  CreateBr(bubbleSetBlock);
199
200  SetInsertPoint(bubbleSetBlock);
201  PHINode * bubbleInitVal = CreatePHI(mBitBlockType, 2, "bubbleInitVal");
202  bubbleInitVal->addIncoming(CreateBitCast(getInt64(0), mBitBlockType), entryBlock);
203  bubbleInitVal->addIncoming(calcBubble, bubbleCalculateBlock);
204
205  Value * bubblePtr = CreateGEP(bubble, {getInt32(0), id});
206  CreateStore(bubbleInitVal, bubblePtr);
207
208  CreateCall(barrierFunc);
209
210  Value * carryOffsetPtr = nullptr;
211  Value * carryVal = carryInitVal;
212  Value * bubbleOffsetPtr = nullptr;
213  Value * bubbleVal = bubbleInitVal;
214
215  for (int offset=groupThreads/2; offset>0; offset=offset>>1){
216    carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
217    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
218    CreateStore(carryVal, carryPtr);
219    bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
220    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
221    CreateStore(bubbleVal, bubblePtr);
222    CreateCall(barrierFunc);
223  }
224
225  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
226  Value * carryVal0 = CreateLoad(firstCarryPtr, "carry0");
227  Value * carry_mask = CreateOr(CreateShl(carryVal0, 1), blockCarry);
228  Value * firstBubblePtr = CreateGEP(bubble, {getInt32(0), getInt32(0)});
229  Value * bubble_mask = CreateLoad(firstBubblePtr, "bubble_mask");
230
231  Value * s = CreateAnd(CreateAdd(carry_mask, bubble_mask), CreateNot(bubble_mask));
232  Value * inc = CreateOr(s, CreateSub(s, carry_mask));
233  Value * rslt = CreateAdd(partial_sum, CreateAnd(CreateLShr(inc, CreateBitCast(id64, mBitBlockType)), CreateBitCast(getInt64(1), mBitBlockType)));
234
235  Value * blockCarryOut = CreateLShr(CreateOr(carryVal0, CreateAnd(bubble_mask, inc)), 63);
236
237  Value * retVal = UndefValue::get(returnType);
238  retVal = CreateInsertValue(retVal, rslt, 0);
239  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
240  CreateRet(retVal);
241
242}
243
244void IDISA_NVPTX20_Builder::CreateBallotFunc(){
245    Type * const int32ty = getInt32Ty();
246    Type * const int1ty = getInt1Ty();
247    Function * const ballotFn = cast<Function>(mMod->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
248    ballotFn->setCallingConv(CallingConv::C);
249    Function::arg_iterator args = ballotFn->arg_begin();
250
251    Value * const input = &*(args++);
252    input->setName("input");
253
254    SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", ballotFn, 0));
255
256    Value * conv = CreateZExt(input, int32ty);
257
258    const char * AsmStream = "{.reg .pred %p1;"
259                             "setp.ne.u32 %p1, $1, 0;"
260                             "vote.ballot.b32  $0, %p1;}";
261    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
262    llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
263    llvm::CallInst * result = CreateCall(IA, conv);
264    result->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoUnwind);
265
266    CreateRet(result);
267}
268
269LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
270    return CreateLoad(ptr);
271   
272}
273StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
274    return CreateStore(val, ptr);
275}
276
277   
278}
Note: See TracBrowser for help on using the repository browser.