source: icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp @ 5495

Last change on this file since 5495 was 5486, checked in by nmedfort, 2 years ago

Initial attempt to improve debugging capabilities with compilation stack traces on error.

File size: 12.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_nvptx_builder.h"
8#include <llvm/IR/InlineAsm.h>
9#include <llvm/IR/Module.h>
10
11using namespace llvm;
12
13namespace IDISA {
14   
15std::string IDISA_NVPTX20_Builder::getBuilderUniqueName() { return "NVPTX20_" + std::to_string(groupThreads);}
16
17unsigned IDISA_NVPTX20_Builder::getGroupThreads() const{
18    return groupThreads;
19}
20
21Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
22    Type * const int32ty = getInt32Ty();
23    Function * barrierOrFunc = cast<Function>(getModule()->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
24    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
25    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
26    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
27    return CreateICmpNE(anyNonZero,  ConstantInt::getNullValue(int32ty));
28}
29
30Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
31    Type * const int64ty = getInt64Ty();
32    Value * id = CreateCall(tidFunc);
33    Value * id64 = CreateZExt(id, int64ty);
34    Value * threadSize = getInt64(groupThreads);
35    Value * fullBlocks = CreateUDiv(pos, threadSize);
36    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
37    Value * finalBlockMask = CreateShl(getInt64(-1), CreateURem(pos, threadSize));
38    Value * unusedBlockMask = CreateSExt(CreateICmpUGT(id64, fullBlocks), int64ty);
39    return CreateBitCast(CreateOr(CreateAnd(finalBlockMask, finalBlockSelect), unusedBlockMask), mBitBlockType);
40}
41
42Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
43    Type * const int64ty = getInt64Ty();
44    Value * id = CreateCall(tidFunc);
45    Value * id64 = CreateZExt(id, int64ty);
46    Value * threadSize = getInt64(groupThreads);
47    Value * fullBlocks = CreateUDiv(pos, threadSize);
48    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
49    Value * finalBlockMask = CreateShl(getInt64(1), CreateURem(pos, threadSize));
50    return CreateBitCast(CreateAnd(finalBlockMask, finalBlockSelect), mBitBlockType);
51}
52   
53std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_advance(Value * a, Value * shiftin, unsigned shift) {
54    Value * id = CreateCall(tidFunc);
55    Value * retVal = CreateCall(mLongAdvanceFunc, {id, a, CreateBitCast(getInt64(shift), mBitBlockType), shiftin});
56    Value * shifted = CreateExtractValue(retVal, {0});
57    Value * shiftOut = CreateExtractValue(retVal, {1});
58    return std::pair<Value *, Value *>(shiftOut, shifted);
59}
60
61std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_add_with_carry(Value * a, Value * b, Value * carryIn) {
62    Value * id = CreateCall(tidFunc);
63    Value * retVal = CreateCall(mLongAddFunc, {id, a, b, carryIn});
64    Value * sum = CreateExtractValue(retVal, {0});
65    Value * carry_out_strm = CreateExtractValue(retVal, {1});
66    return std::pair<Value *, Value *>(carry_out_strm, sum);
67}
68
69void IDISA_NVPTX20_Builder::CreateGlobals(){
70    Module * const m = getModule();
71    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
72    carry = new GlobalVariable(*m,
73        /*Type=*/carryTy,
74        /*isConstant=*/false,
75        /*Linkage=*/GlobalValue::InternalLinkage,
76        /*Initializer=*/0, 
77        /*Name=*/"carry",
78        /*InsertBefore*/nullptr,
79        /*TLMode */GlobalValue::NotThreadLocal,
80        /*AddressSpace*/ 3,
81        /*isExternallyInitialized*/false);
82
83    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
84
85    bubble = new GlobalVariable(*m,
86        /*Type=*/bubbleTy,
87        /*isConstant=*/false,
88        /*Linkage=*/GlobalValue::InternalLinkage,
89        /*Initializer=*/0, 
90        /*Name=*/"bubble",
91        /*InsertBefore*/nullptr,
92        /*TLMode */GlobalValue::NotThreadLocal,
93        /*AddressSpace*/ 3,
94        /*isExternallyInitialized*/false);
95   
96    ConstantAggregateZero* carryConstArray = ConstantAggregateZero::get(carryTy);
97    carry->setInitializer(carryConstArray);
98    ConstantAggregateZero* bubbleConstAray = ConstantAggregateZero::get(bubbleTy);
99    bubble->setInitializer(bubbleConstAray);
100
101}
102
103void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){
104    Type * const voidTy = getVoidTy();
105    Type * const int32ty = getInt32Ty();
106    Module * const m = getModule();
107    barrierFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
108    tidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
109}
110
111void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
112    Type * const int32ty = getInt32Ty();
113    Module * const m = getModule();
114    Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
115    mLongAdvanceFunc = cast<Function>(m->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
116    mLongAdvanceFunc->setCallingConv(CallingConv::C);
117    auto args = mLongAdvanceFunc->arg_begin();
118
119    Value * const id = &*(args++);
120    id->setName("id");
121    Value * const val = &*(args++);
122    val->setName("val");
123    Value * const shftAmount = &*(args++);
124    shftAmount->setName("shftAmount");
125    Value * const blockCarry = &*(args++);
126    blockCarry->setName("blockCarry");
127
128    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mLongAdvanceFunc,0));
129
130    Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
131    CreateStore(blockCarry, firstCarryPtr);
132
133    Value * adv0 = CreateShl(val, shftAmount);
134    Value * nextid = CreateAdd(id, getInt32(1));
135    Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
136    Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
137    CreateStore(lshr0, carryNextPtr);
138
139    CreateCall(barrierFunc);
140
141    Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
142    Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
143
144    Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
145    Value * carryVal = CreateLoad(carryPtr, "carryVal");
146    Value * adv1 = CreateOr(adv0, carryVal);
147
148
149    Value * retVal = UndefValue::get(returnType);
150    retVal = CreateInsertValue(retVal, adv1, 0);
151    retVal = CreateInsertValue(retVal, blockCarryOut, 1);
152    CreateRet(retVal);
153
154}
155
156                                           
157                                           
158void IDISA_NVPTX20_Builder::CreateLongAddFunc(){
159  Type * const int64ty = getInt64Ty();
160  Type * const int32ty = getInt32Ty();
161  Module * const m = getModule();
162
163  Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
164
165  mLongAddFunc = cast<Function>(m->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
166  mLongAddFunc->setCallingConv(CallingConv::C);
167  Function::arg_iterator args = mLongAddFunc->arg_begin();
168
169  Value * const id = &*(args++);
170  id->setName("id");
171  Value * const valA = &*(args++);
172  valA->setName("valA");
173  Value * const valB = &*(args++);
174  valB->setName("valB");
175  Value * const blockCarry = &*(args++);
176  blockCarry->setName("blockCarry");
177
178  BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", mLongAddFunc, 0);
179  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(m->getContext(), "bubbleCalculate", mLongAddFunc, 0);
180  BasicBlock * bubbleSetBlock = BasicBlock::Create(m->getContext(), "bubbleSet", mLongAddFunc, 0);
181
182  SetInsertPoint(entryBlock);
183
184  Value * id64 = CreateZExt(id, int64ty);
185
186  Value * partial_sum = CreateAdd(valA, valB);
187  Value * gen = CreateAnd(valA, valB);
188  Value * prop = CreateXor(valA, valB);
189
190  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
191  Value * carryInitVal = CreateAnd(CreateOr(gen, CreateAnd(prop, CreateNot(partial_sum))), CreateBitCast(getInt64(0x8000000000000000), mBitBlockType));
192  carryInitVal = CreateLShr(carryInitVal, CreateBitCast(CreateSub(getInt64(63), id64), mBitBlockType));
193  CreateStore(carryInitVal, carryPtr);
194
195  Value * bubbleCond = CreateICmpEQ(CreateAdd(CreateBitCast(partial_sum, int64ty), getInt64(1)), getInt64(0));
196  CreateCondBr(bubbleCond, bubbleCalculateBlock, bubbleSetBlock);
197
198  SetInsertPoint(bubbleCalculateBlock);
199  Value * calcBubble = CreateBitCast(CreateShl(getInt64(1), id64), mBitBlockType);
200  CreateBr(bubbleSetBlock);
201
202  SetInsertPoint(bubbleSetBlock);
203  PHINode * bubbleInitVal = CreatePHI(mBitBlockType, 2, "bubbleInitVal");
204  bubbleInitVal->addIncoming(CreateBitCast(getInt64(0), mBitBlockType), entryBlock);
205  bubbleInitVal->addIncoming(calcBubble, bubbleCalculateBlock);
206
207  Value * bubblePtr = CreateGEP(bubble, {getInt32(0), id});
208  CreateStore(bubbleInitVal, bubblePtr);
209
210  CreateCall(barrierFunc);
211
212  Value * carryOffsetPtr = nullptr;
213  Value * carryVal = carryInitVal;
214  Value * bubbleOffsetPtr = nullptr;
215  Value * bubbleVal = bubbleInitVal;
216
217  for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){
218    carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
219    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
220    CreateStore(carryVal, carryPtr);
221    bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
222    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
223    CreateStore(bubbleVal, bubblePtr);
224    CreateCall(barrierFunc);
225  }
226
227  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
228  Value * carryVal0 = CreateLoad(firstCarryPtr, "carry0");
229  Value * carry_mask = CreateOr(CreateShl(carryVal0, 1), blockCarry);
230  Value * firstBubblePtr = CreateGEP(bubble, {getInt32(0), getInt32(0)});
231  Value * bubble_mask = CreateLoad(firstBubblePtr, "bubble_mask");
232
233  Value * s = CreateAnd(CreateAdd(carry_mask, bubble_mask), CreateNot(bubble_mask));
234  Value * inc = CreateOr(s, CreateSub(s, carry_mask));
235  Value * rslt = CreateAdd(partial_sum, CreateAnd(CreateLShr(inc, CreateBitCast(id64, mBitBlockType)), CreateBitCast(getInt64(1), mBitBlockType)));
236
237  Value * blockCarryOut = CreateLShr(CreateOr(carryVal0, CreateAnd(bubble_mask, inc)), 63);
238
239  Value * retVal = UndefValue::get(returnType);
240  retVal = CreateInsertValue(retVal, rslt, 0);
241  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
242  CreateRet(retVal);
243
244}
245
246void IDISA_NVPTX20_Builder::CreateBallotFunc(){
247    Type * const int32ty = getInt32Ty();
248    Type * const int1ty = getInt1Ty();
249    Module * const m = getModule();
250    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
251    ballotFn->setCallingConv(CallingConv::C);
252    Function::arg_iterator args = ballotFn->arg_begin();
253
254    Value * const input = &*(args++);
255    input->setName("input");
256
257    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
258
259    Value * conv = CreateZExt(input, int32ty);
260
261    const char * AsmStream = "{.reg .pred %p1;"
262                             "setp.ne.u32 %p1, $1, 0;"
263                             "vote.ballot.b32  $0, %p1;}";
264    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
265    InlineAsm *IA = InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
266    CallInst * result = CreateCall(IA, conv);
267    result->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
268
269    CreateRet(result);
270}
271
272LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
273    return CreateLoad(ptr);   
274}
275
276StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
277    return CreateStore(val, ptr);
278}
279
280void IDISA_NVPTX20_Builder::CreateBaseFunctions() {
281    CreateGlobals();
282    CreateBuiltinFunctions();
283    CreateLongAdvanceFunc();
284    CreateLongAddFunc();
285    CreateBallotFunc();
286}
287
288#ifdef HAS_ADDRESS_SANITIZER
289LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const char * Name) {
290    return IRBuilder<>::CreateLoad(Ptr, Name);
291}
292
293LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const Twine & Name) {
294    return IRBuilder<>::CreateLoad(Ptr, Name);
295}
296
297LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Type * Ty, Value * Ptr, const Twine & Name) {
298    return IRBuilder<>::CreateLoad(Ty, Ptr, Name);
299}
300
301LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, bool isVolatile, const Twine & Name) {
302    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
303}
304
305StoreInst * IDISA_NVPTX20_Builder::CreateStore(Value * Val, Value * Ptr, bool isVolatile) {
306    return IRBuilder<>::CreateStore(Val, Ptr, isVolatile);
307}
308#endif
309
310}
Note: See TracBrowser for help on using the repository browser.