source: icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp @ 5732

Last change on this file since 5732 was 5630, checked in by nmedfort, 22 months ago

Partial check-in for avoidance of compiling Pablo/LLVM code to determine the Kernel struct type when using a cached object. Inactive RE alternation minimization check in.

File size: 12.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "idisa_nvptx_builder.h"
8#include <llvm/IR/InlineAsm.h>
9#include <llvm/IR/Module.h>
10
11using namespace llvm;
12
13namespace IDISA {
14   
15std::string IDISA_NVPTX20_Builder::getBuilderUniqueName() { return "NVPTX20_" + std::to_string(groupThreads);}
16
17unsigned IDISA_NVPTX20_Builder::getGroupThreads() const{
18    return groupThreads;
19}
20
21Value * IDISA_NVPTX20_Builder::bitblock_any(Value * val) {
22    Type * const int32ty = getInt32Ty();
23    Function * barrierOrFunc = cast<Function>(getModule()->getOrInsertFunction("llvm.nvvm.barrier0.or", int32ty, int32ty, nullptr));
24    Value * nonZero_i1 = CreateICmpUGT(val, ConstantInt::getNullValue(mBitBlockType));
25    Value * nonZero_i32 = CreateZExt(CreateBitCast(nonZero_i1, getInt1Ty()), int32ty);
26    Value * anyNonZero = CreateCall(barrierOrFunc, nonZero_i32);
27    return CreateICmpNE(anyNonZero,  ConstantInt::getNullValue(int32ty));
28}
29
30Value * IDISA_NVPTX20_Builder::bitblock_mask_from(Value * pos){
31    Type * const int64ty = getInt64Ty();
32    Value * id = CreateCall(tidFunc);
33    Value * id64 = CreateZExt(id, int64ty);
34    Value * threadSize = getInt64(groupThreads);
35    Value * fullBlocks = CreateUDiv(pos, threadSize);
36    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
37    Value * finalBlockMask = CreateShl(getInt64(-1), CreateURem(pos, threadSize));
38    Value * unusedBlockMask = CreateSExt(CreateICmpUGT(id64, fullBlocks), int64ty);
39    return CreateBitCast(CreateOr(CreateAnd(finalBlockMask, finalBlockSelect), unusedBlockMask), mBitBlockType);
40}
41
42Value * IDISA_NVPTX20_Builder::bitblock_set_bit(Value * pos){
43    Type * const int64ty = getInt64Ty();
44    Value * id = CreateCall(tidFunc);
45    Value * id64 = CreateZExt(id, int64ty);
46    Value * threadSize = getInt64(groupThreads);
47    Value * fullBlocks = CreateUDiv(pos, threadSize);
48    Value * finalBlockSelect = CreateSExt(CreateICmpEQ(id64, fullBlocks), int64ty);
49    Value * finalBlockMask = CreateShl(getInt64(1), CreateURem(pos, threadSize));
50    return CreateBitCast(CreateAnd(finalBlockMask, finalBlockSelect), mBitBlockType);
51}
52   
53std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_advance(Value * a, Value * shiftin, unsigned shift) {
54    Value * id = CreateCall(tidFunc);
55    Value * retVal = CreateCall(mLongAdvanceFunc, {id, a, CreateBitCast(getInt64(shift), mBitBlockType), shiftin});
56    Value * shifted = CreateExtractValue(retVal, {0});
57    Value * shiftOut = CreateExtractValue(retVal, {1});
58    return std::pair<Value *, Value *>(shiftOut, shifted);
59}
60
61std::pair<Value *, Value *> IDISA_NVPTX20_Builder::bitblock_add_with_carry(Value * a, Value * b, Value * carryIn) {
62    Value * id = CreateCall(tidFunc);
63    Value * retVal = CreateCall(mLongAddFunc, {id, a, b, carryIn});
64    Value * sum = CreateExtractValue(retVal, {0});
65    Value * carry_out_strm = CreateExtractValue(retVal, {1});
66    return std::pair<Value *, Value *>(carry_out_strm, sum);
67}
68
69void IDISA_NVPTX20_Builder::CreateGlobals(){
70    Module * const m = getModule();
71    Type * const carryTy = ArrayType::get(mBitBlockType, groupThreads+1);
72    carry = new GlobalVariable(*m,
73        /*Type=*/carryTy,
74        /*isConstant=*/false,
75        /*Linkage=*/GlobalValue::InternalLinkage,
76        /*Initializer=*/0, 
77        /*Name=*/"carry",
78        /*InsertBefore*/nullptr,
79        /*TLMode */GlobalValue::NotThreadLocal,
80        /*AddressSpace*/ 3,
81        /*isExternallyInitialized*/false);
82
83    Type * const bubbleTy = ArrayType::get(mBitBlockType, groupThreads);
84
85    bubble = new GlobalVariable(*m,
86        /*Type=*/bubbleTy,
87        /*isConstant=*/false,
88        /*Linkage=*/GlobalValue::InternalLinkage,
89        /*Initializer=*/0, 
90        /*Name=*/"bubble",
91        /*InsertBefore*/nullptr,
92        /*TLMode */GlobalValue::NotThreadLocal,
93        /*AddressSpace*/ 3,
94        /*isExternallyInitialized*/false);
95   
96    ConstantAggregateZero* carryConstArray = ConstantAggregateZero::get(carryTy);
97    carry->setInitializer(carryConstArray);
98    ConstantAggregateZero* bubbleConstAray = ConstantAggregateZero::get(bubbleTy);
99    bubble->setInitializer(bubbleConstAray);
100
101}
102
103void IDISA_NVPTX20_Builder::CreateBuiltinFunctions(){
104    Type * const voidTy = getVoidTy();
105    Type * const int32ty = getInt32Ty();
106    Module * const m = getModule();
107    barrierFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.barrier0", voidTy, nullptr));
108    tidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.tid.x", int32ty, nullptr));
109}
110
111void IDISA_NVPTX20_Builder::CreateLongAdvanceFunc(){
112    Type * const int32ty = getInt32Ty();
113    Module * const m = getModule();
114    Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
115    mLongAdvanceFunc = cast<Function>(m->getOrInsertFunction("LongAdvance", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
116    mLongAdvanceFunc->setCallingConv(CallingConv::C);
117    auto args = mLongAdvanceFunc->arg_begin();
118
119    Value * const id = &*(args++);
120    id->setName("id");
121    Value * const val = &*(args++);
122    val->setName("val");
123    Value * const shftAmount = &*(args++);
124    shftAmount->setName("shftAmount");
125    Value * const blockCarry = &*(args++);
126    blockCarry->setName("blockCarry");
127
128    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mLongAdvanceFunc,0));
129
130    Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
131    CreateStore(blockCarry, firstCarryPtr);
132
133    Value * adv0 = CreateShl(val, shftAmount);
134    Value * nextid = CreateAdd(id, getInt32(1));
135    Value * carryNextPtr = CreateGEP(carry, {getInt32(0), nextid});
136    Value * lshr0 = CreateLShr(val, CreateSub(CreateBitCast(getInt64(64), mBitBlockType), shftAmount));
137    CreateStore(lshr0, carryNextPtr);
138
139    CreateCall(barrierFunc);
140
141    Value * lastCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(groupThreads)});
142    Value * blockCarryOut = CreateLoad(lastCarryPtr, "blockCarryOut");
143
144    Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
145    Value * carryVal = CreateLoad(carryPtr, "carryVal");
146    Value * adv1 = CreateOr(adv0, carryVal);
147
148
149    Value * retVal = UndefValue::get(returnType);
150    retVal = CreateInsertValue(retVal, adv1, 0);
151    retVal = CreateInsertValue(retVal, blockCarryOut, 1);
152    CreateRet(retVal);
153
154}
155
156                                           
157                                           
158void IDISA_NVPTX20_Builder::CreateLongAddFunc(){
159  Type * const int64ty = getInt64Ty();
160  Type * const int32ty = getInt32Ty();
161  Module * const m = getModule();
162
163  Type * returnType = StructType::get(m->getContext(), {mBitBlockType, mBitBlockType});
164
165  mLongAddFunc = cast<Function>(m->getOrInsertFunction("LongAdd", returnType, int32ty, mBitBlockType, mBitBlockType, mBitBlockType, nullptr));
166  mLongAddFunc->setCallingConv(CallingConv::C);
167  Function::arg_iterator args = mLongAddFunc->arg_begin();
168
169  Value * const id = &*(args++);
170  id->setName("id");
171  Value * const valA = &*(args++);
172  valA->setName("valA");
173  Value * const valB = &*(args++);
174  valB->setName("valB");
175  Value * const blockCarry = &*(args++);
176  blockCarry->setName("blockCarry");
177
178  BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", mLongAddFunc, 0);
179  BasicBlock * bubbleCalculateBlock = BasicBlock::Create(m->getContext(), "bubbleCalculate", mLongAddFunc, 0);
180  BasicBlock * bubbleSetBlock = BasicBlock::Create(m->getContext(), "bubbleSet", mLongAddFunc, 0);
181
182  SetInsertPoint(entryBlock);
183
184  Value * id64 = CreateZExt(id, int64ty);
185
186  Value * partial_sum = CreateAdd(valA, valB);
187  Value * gen = CreateAnd(valA, valB);
188  Value * prop = CreateXor(valA, valB);
189
190  Value * carryPtr = CreateGEP(carry, {getInt32(0), id});
191  Value * carryInitVal = CreateAnd(CreateOr(gen, CreateAnd(prop, CreateNot(partial_sum))), CreateBitCast(getInt64(0x8000000000000000), mBitBlockType));
192  carryInitVal = CreateLShr(carryInitVal, CreateBitCast(CreateSub(getInt64(63), id64), mBitBlockType));
193  CreateStore(carryInitVal, carryPtr);
194
195  Value * bubbleCond = CreateICmpEQ(CreateAdd(CreateBitCast(partial_sum, int64ty), getInt64(1)), getInt64(0));
196  CreateCondBr(bubbleCond, bubbleCalculateBlock, bubbleSetBlock);
197
198  SetInsertPoint(bubbleCalculateBlock);
199  Value * calcBubble = CreateBitCast(CreateShl(getInt64(1), id64), mBitBlockType);
200  CreateBr(bubbleSetBlock);
201
202  SetInsertPoint(bubbleSetBlock);
203  PHINode * bubbleInitVal = CreatePHI(mBitBlockType, 2, "bubbleInitVal");
204  bubbleInitVal->addIncoming(CreateBitCast(getInt64(0), mBitBlockType), entryBlock);
205  bubbleInitVal->addIncoming(calcBubble, bubbleCalculateBlock);
206
207  Value * bubblePtr = CreateGEP(bubble, {getInt32(0), id});
208  CreateStore(bubbleInitVal, bubblePtr);
209
210  CreateCall(barrierFunc);
211
212  Value * carryVal = carryInitVal;
213  Value * bubbleVal = bubbleInitVal;
214
215  for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){
216    Value * carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
217    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
218    CreateStore(carryVal, carryPtr);
219    Value * bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
220    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
221    CreateStore(bubbleVal, bubblePtr);
222    CreateCall(barrierFunc);
223  }
224
225  Value * firstCarryPtr = CreateGEP(carry, {getInt32(0), getInt32(0)});
226  Value * carryVal0 = CreateLoad(firstCarryPtr, "carry0");
227  Value * carry_mask = CreateOr(CreateShl(carryVal0, 1), blockCarry);
228  Value * firstBubblePtr = CreateGEP(bubble, {getInt32(0), getInt32(0)});
229  Value * bubble_mask = CreateLoad(firstBubblePtr, "bubble_mask");
230
231  Value * s = CreateAnd(CreateAdd(carry_mask, bubble_mask), CreateNot(bubble_mask));
232  Value * inc = CreateOr(s, CreateSub(s, carry_mask));
233  Value * rslt = CreateAdd(partial_sum, CreateAnd(CreateLShr(inc, CreateBitCast(id64, mBitBlockType)), CreateBitCast(getInt64(1), mBitBlockType)));
234
235  Value * blockCarryOut = CreateLShr(CreateOr(carryVal0, CreateAnd(bubble_mask, inc)), 63);
236
237  Value * retVal = UndefValue::get(returnType);
238  retVal = CreateInsertValue(retVal, rslt, 0);
239  retVal = CreateInsertValue(retVal, blockCarryOut, 1);
240  CreateRet(retVal);
241
242}
243
244void IDISA_NVPTX20_Builder::CreateBallotFunc(){
245    Type * const int32ty = getInt32Ty();
246    Type * const int1ty = getInt1Ty();
247    Module * const m = getModule();
248    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
249    ballotFn->setCallingConv(CallingConv::C);
250    Function::arg_iterator args = ballotFn->arg_begin();
251
252    Value * const input = &*(args++);
253    input->setName("input");
254
255    SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
256
257    Value * conv = CreateZExt(input, int32ty);
258
259    const char * AsmStream = "{.reg .pred %p1;"
260                             "setp.ne.u32 %p1, $1, 0;"
261                             "vote.ballot.b32  $0, %p1;}";
262    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
263    InlineAsm *IA = InlineAsm::get(AsmFnTy, AsmStream, "=r,r", true, false);
264    CallInst * result = CreateCall(IA, conv);
265    result->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
266
267    CreateRet(result);
268}
269
270LoadInst * IDISA_NVPTX20_Builder::CreateAtomicLoadAcquire(Value * ptr) {
271    return CreateLoad(ptr);   
272}
273
274StoreInst * IDISA_NVPTX20_Builder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
275    return CreateStore(val, ptr);
276}
277
278void IDISA_NVPTX20_Builder::CreateBaseFunctions() {
279    CreateGlobals();
280    CreateBuiltinFunctions();
281    CreateLongAdvanceFunc();
282    CreateLongAddFunc();
283    CreateBallotFunc();
284}
285
286#ifdef HAS_ADDRESS_SANITIZER
287LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const char * Name) {
288    return IRBuilder<>::CreateLoad(Ptr, Name);
289}
290
291LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, const Twine & Name) {
292    return IRBuilder<>::CreateLoad(Ptr, Name);
293}
294
295LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Type * Ty, Value * Ptr, const Twine & Name) {
296    return IRBuilder<>::CreateLoad(Ty, Ptr, Name);
297}
298
299LoadInst * IDISA_NVPTX20_Builder::CreateLoad(Value * Ptr, bool isVolatile, const Twine & Name) {
300    return IRBuilder<>::CreateLoad(Ptr, isVolatile, Name);
301}
302
303StoreInst * IDISA_NVPTX20_Builder::CreateStore(Value * Val, Value * Ptr, bool isVolatile) {
304    return IRBuilder<>::CreateStore(Val, Ptr, isVolatile);
305}
306#endif
307
308}
Note: See TracBrowser for help on using the repository browser.