source: icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp @ 5486

Last change on this file since 5486 was 5486, checked in by nmedfort, 22 months ago

Initial attempt to improve debugging capabilities with compilation stack traces on error.

File size: 12.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_nvptx_builder.h"
8#include <llvm/IR/InlineAsm.h>
9#include <llvm/IR/Module.h>
10
11using namespace llvm;
12
13namespace IDISA {
14   
15std::string IDISA_NVPTX20_Builder::getBuilderUniqueName() { return "NVPTX20_" + std::to_string(groupThreads);}
16
17unsigned IDISA_NVPTX20_Builder::getGroupThreads() const{
18    return groupThreads;
19}
20
21Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
22    Type * const int32ty = getInt32Ty();
23    Function * barrierOrFunc = cast<Function>(getModule()->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
24    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
25    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
26    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
27    return CreateICmpNE(anyNonZero,  ConstantInt::getNullValue(int32ty));
28}
29
30Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
31    Type * const int64ty = getInt64Ty();
32    Value * id = CreateCall(tidFunc);
33    Value * id64 = CreateZExt(id, int64ty);
34    Value * threadSize = getInt64(groupThreads);
35    Value * fullBlocks = CreateUDiv(pos, threadSize);
36    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
37    Value * finalBlockMask = CreateShl(getInt64(-1), CreateURem(pos, threadSize));
38    Value * unusedBlockMask = CreateSExt(CreateICmpUGT(id64, fullBlocks), int64ty);
39    return CreateBitCast(CreateOr(CreateAnd(finalBlockMask, finalBlockSelect), unusedBlockMask), mBitBlockType);
40}
41
42Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
43    Type * const int64ty = getInt64Ty();
44    Value * id = CreateCall(tidFunc);
45    Value * id64 = CreateZExt(id, int64ty);
46    Value * threadSize = getInt64(groupThreads);
47    Value * fullBlocks = CreateUDiv(pos, threadSize);
48    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
49    Value * finalBlockMask = CreateShl(getInt64(1), CreateURem(pos, threadSize));
50    return CreateBitCast(CreateAnd(finalBlockMask, finalBlockSelect), mBitBlockType);
51}
52   
53std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_advance(Value * a, Value * shiftin, unsigned shift) {
54    Value * id = CreateCall(tidFunc);
55    Value * retVal = CreateCall(mLongAdvanceFunc, {id, a, CreateBitCast(getInt64(shift), mBitBlockType), shiftin});
56    Value * shifted = CreateExtractValue(retVal, {0});
57    Value * shiftOut = CreateExtractValue(retVal, {1});
58    return std::pair<Value *, Value *>(shiftOut, shifted);
59}
60
61std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_add_with_carry(Value * a, Value * b, Value * carryIn) {
62    Value * id = CreateCall(tidFunc);
63    Value * retVal = CreateCall(mLongAddFunc, {id, a, b, carryIn});
64    Value * sum = CreateExtractValue(retVal, {0});
65    Value * carry_out_strm = CreateExtractValue(retVal, {1});
66    return std::pair<Value *, Value *>(carry_out_strm, sum);
67}
68
69void IDISA_NVPTX20_Builder::CreateGlobals(){
70    Module * const m = getModule();
71    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
72    carry = new GlobalVariable(*m,
73        /*Type=*/carryTy,
74        /*isConstant=*/false,
75        /*Linkage=*/GlobalValue::InternalLinkage,
76        /*Initializer=*/0, 
77        /*Name=*/"carry",
78        /*InsertBefore*/nullptr,
79        /*TLMode */GlobalValue::NotThreadLocal,
80        /*AddressSpace*/ 3,
81        /*isExternallyInitialized*/false);
82
83    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
84
85    bubble = new GlobalVariable(*m,
86        /*Type=*/bubbleTy,
87        /*isConstant=*/false,
88        /*Linkage=*/GlobalValue::InternalLinkage,
89        /*Initializer=*/0, 
90        /*Name=*/"bubble",
91        /*InsertBefore*/nullptr,
92        /*TLMode */GlobalValue::NotThreadLocal,
93        /*AddressSpace*/ 3,
94        /*isExternallyInitialized*/false);
95   
96    ConstantAggregateZero* carryConstArray = ConstantAggregateZero::get(carryTy);
97    carry->setInitializer(carryConstArray);
98    ConstantAggregateZero* bubbleConstAray = ConstantAggregateZero::get(bubbleTy);
99    bubble->setInitializer(bubbleConstAray);
100
101}
102
103void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){
104    Type * const voidTy = getVoidTy();
105    Type * const int32ty = getInt32Ty();
106    Module * const m = getModule();
107    barrierFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
108    tidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
109}
110
111void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
112    Type * const int32ty = getInt32Ty();
113    Module * const m = getModule();
114    Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
115    mLongAdvanceFunc = cast<Function>(m->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
116    mLongAdvanceFunc->setCallingConv(CallingConv::C);
117    auto args = mLongAdvanceFunc->arg_begin();
118
119    Value * const id = &*(args++);
120    id->setName("id");
121    Value * const val = &*(args++);
122    val->setName("val");
123    Value * const shftAmount = &*(args++);
124    shftAmount->setName("shftAmount");
125    Value * const blockCarry = &*(args++);
126    blockCarry->setName("blockCarry");
127
128    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mLongAdvanceFunc,0));
129
130    Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
131    CreateStore(blockCarry, firstCarryPtr);
132
133    Value * adv0 = CreateShl(val, shftAmount);
134    Value * nextid = CreateAdd(id, getInt32(1));
135    Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
136    Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
137    CreateStore(lshr0, carryNextPtr);
138
139    CreateCall(barrierFunc);
140
141    Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
142    Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
143
144    Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
145    Value * carryVal = CreateLoad(carryPtr, "carryVal");
146    Value * adv1 = CreateOr(adv0, carryVal);
147
148
149    Value * retVal = UndefValue::get(returnType);
150    retVal = CreateInsertValue(retVal, adv1, 0);
151    retVal = CreateInsertValue(retVal, blockCarryOut, 1);
152    CreateRet(retVal);
153
154}
155
156                                           
157                                           
158void IDISA_NVPTX20_Builder::CreateLongAddFunc(){
159  Type * const int64ty = getInt64Ty();
160  Type * const int32ty = getInt32Ty();
161  Module * const m = getModule();
162
163  Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
164
165  mLongAddFunc = cast<Function>(m->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
166  mLongAddFunc->setCallingConv(CallingConv::C);
167  Function::arg_iterator args = mLongAddFunc->arg_begin();
168
169  Value * const id = &*(args++);
170  id->setName("id");
171  Value * const valA = &*(args++);
172  valA->setName("valA");
173  Value * const valB = &*(args++);
174  valB->setName("valB");
175  Value * const blockCarry = &*(args++);
176  blockCarry->setName("blockCarry");
177
178  BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", mLongAddFunc, 0);
179  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(m->getContext(), "bubbleCalculate", mLongAddFunc, 0);
180  BasicBlock * bubbleSetBlock = BasicBlock::Create(m->getContext(), "bubbleSet", mLongAddFunc, 0);
181
182  SetInsertPoint(entryBlock);
183
184  Value * id64 = CreateZExt(id, int64ty);
185
186  Value * partial_sum = CreateAdd(valA, valB);
187  Value * gen = CreateAnd(valA, valB);
188  Value * prop = CreateXor(valA, valB);
189
190  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
191  Value * carryInitVal = CreateAnd(CreateOr(gen, CreateAnd(prop, CreateNot(partial_sum))), CreateBitCast(getInt64(0x8000000000000000), mBitBlockType));
192  carryInitVal = CreateLShr(carryInitVal, CreateBitCast(CreateSub(getInt64(63), id64), mBitBlockType));
193  CreateStore(carryInitVal, carryPtr);
194
195  Value * bubbleCond = CreateICmpEQ(CreateAdd(CreateBitCast(partial_sum, int64ty), getInt64(1)), getInt64(0));
196  CreateCondBr(bubbleCond, bubbleCalculateBlock, bubbleSetBlock);
197
198  SetInsertPoint(bubbleCalculateBlock);
199  Value * calcBubble = CreateBitCast(CreateShl(getInt64(1), id64), mBitBlockType);
200  CreateBr(bubbleSetBlock);
201
202  SetInsertPoint(bubbleSetBlock);
203  PHINode * bubbleInitVal = CreatePHI(mBitBlockType, 2, "bubbleInitVal");
204  bubbleInitVal->addIncoming(CreateBitCast(getInt64(0), mBitBlockType), entryBlock);
205  bubbleInitVal->addIncoming(calcBubble, bubbleCalculateBlock);
206
207  Value * bubblePtr = CreateGEP(bubble, {getInt32(0), id});
208  CreateStore(bubbleInitVal, bubblePtr);
209
210  CreateCall(barrierFunc);
211
212  Value * carryOffsetPtr = nullptr;
213  Value * carryVal = carryInitVal;
214  Value * bubbleOffsetPtr = nullptr;
215  Value * bubbleVal = bubbleInitVal;
216
217  for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){
218    carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
219    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
220    CreateStore(carryVal, carryPtr);
221    bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
222    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
223    CreateStore(bubbleVal, bubblePtr);
224    CreateCall(barrierFunc);
225  }
226
227  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
228  Value * carryVal0 = CreateLoad(firstCarryPtr, "carry0");
229  Value * carry_mask = CreateOr(CreateShl(carryVal0, 1), blockCarry);
230  Value * firstBubblePtr = CreateGEP(bubble, {getInt32(0), getInt32(0)});
231  Value * bubble_mask = CreateLoad(firstBubblePtr, "bubble_mask");
232
233  Value * s = CreateAnd(CreateAdd(carry_mask, bubble_mask), CreateNot(bubble_mask));
234  Value * inc = CreateOr(s, CreateSub(s, carry_mask));
235  Value * rslt = CreateAdd(partial_sum, CreateAnd(CreateLShr(inc, CreateBitCast(id64, mBitBlockType)), CreateBitCast(getInt64(1), mBitBlockType)));
236
237  Value * blockCarryOut = CreateLShr(CreateOr(carryVal0, CreateAnd(bubble_mask, inc)), 63);
238
239  Value * retVal = UndefValue::get(returnType);
240  retVal = CreateInsertValue(retVal, rslt, 0);
241  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
242  CreateRet(retVal);
243
244}
245
246void IDISA_NVPTX20_Builder::CreateBallotFunc(){
247    Type * const int32ty = getInt32Ty();
248    Type * const int1ty = getInt1Ty();
249    Module * const m = getModule();
250    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
251    ballotFn->setCallingConv(CallingConv::C);
252    Function::arg_iterator args = ballotFn->arg_begin();
253
254    Value * const input = &*(args++);
255    input->setName("input");
256
257    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
258
259    Value * conv = CreateZExt(input, int32ty);
260
261    const char * AsmStream = "{.reg .pred %p1;"
262                             "setp.ne.u32 %p1, $1, 0;"
263                             "vote.ballot.b32  $0, %p1;}";
264    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
265    InlineAsm *IA = InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
266    CallInst * result = CreateCall(IA, conv);
267    result->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
268
269    CreateRet(result);
270}
271
272LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
273    return CreateLoad(ptr);   
274}
275
276StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
277    return CreateStore(val, ptr);
278}
279
280void IDISA_NVPTX20_Builder::CreateBaseFunctions() {
281    CreateGlobals();
282    CreateBuiltinFunctions();
283    CreateLongAdvanceFunc();
284    CreateLongAddFunc();
285    CreateBallotFunc();
286}
287
288#ifdef HAS_ADDRESS_SANITIZER
289LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const char * Name) {
290    return IRBuilder<>::CreateLoad(Ptr, Name);
291}
292
293LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const Twine & Name) {
294    return IRBuilder<>::CreateLoad(Ptr, Name);
295}
296
297LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Type * Ty, Value * Ptr, const Twine & Name) {
298    return IRBuilder<>::CreateLoad(Ty, Ptr, Name);
299}
300
301LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, bool isVolatile, const Twine & Name) {
302    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
303}
304
305StoreInst * IDISA_NVPTX20_Builder::CreateStore(Value * Val, Value * Ptr, bool isVolatile) {
306    return IRBuilder<>::CreateStore(Val, Ptr, isVolatile);
307}
308#endif
309
310}
Note: See TracBrowser for help on using the repository browser.