source: icGREP/icgrep-devel/icgrep/editd/editd_gpu_kernel.cpp @ 5260

Last change on this file since 5260 was 5260, checked in by nmedfort, 2 years ago

Changes working towards simplifying accessing stream elements + some modifications to simplify include / forward declarations within the CodeGen? library.

File size: 7.0 KB
RevLine 
[5212]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#include "editd_gpu_kernel.h"
6#include <kernels/kernel.h>
[5238]7#include <IR_Gen/idisa_builder.h>
[5260]8#include <llvm/IR/Module.h>
[5212]9#include <llvm/Support/raw_ostream.h>
10#include <iostream>
11
12using namespace llvm;
13
[5260]14namespace kernel {
[5212]15
[5214]16void bitblock_advance_ci_co(IDISA::IDISA_Builder * iBuilder, Value * val, unsigned shift, Value * stideCarryArr, unsigned carryIdx, std::vector<std::vector<Value *>> & adv, std::vector<std::vector<int>> & calculated, int i, int j){   
17    if(!calculated[i][j]){
18        Value * ptr = iBuilder->CreateGEP(stideCarryArr, {iBuilder->getInt32(0), iBuilder->getInt32(carryIdx)});
19        Value * ci = iBuilder->CreateLoad(ptr);
20        std::pair<Value *, Value *> rslt = iBuilder->bitblock_advance(val, ci, shift);
21        iBuilder->CreateStore(std::get<0>(rslt), ptr);
22        adv[i][j] = std::get<1>(rslt);
23        calculated[i][j] = 1;
24    }
25    return;
[5212]26}
27
[5246]28void editdGPUKernel::generateFinalBlockMethod() const {
[5212]29    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
30    Module * m = iBuilder->getModule();
31    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
32    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
33    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
34    // Final Block arguments: self, remaining, then the standard DoBlock args.
35    Function::arg_iterator args = finalBlockFunction->arg_begin();
36    Value * self = &*(args++);
37    Value * remaining = &*(args++);
38    std::vector<Value *> doBlockArgs = {self};
39    while (args != finalBlockFunction->arg_end()){
40        doBlockArgs.push_back(&*args++);
41    }
42    setScalarField(self, "EOFmask", iBuilder->bitblock_mask_from(remaining));
43    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
44    /* Adjust the produced item count */
[5247]45    Value * produced = getProducedItemCount(self, "ResultStream");
[5246]46    produced = iBuilder->CreateSub(produced, iBuilder->getSize(iBuilder->getStride()));
[5247]47    setProducedItemCount(self, "ResultStream", iBuilder->CreateAdd(produced, remaining));
[5212]48    iBuilder->CreateRetVoid();
49    iBuilder->restoreIP(savePoint);
50}
51   
[5246]52void editdGPUKernel::generateDoBlockMethod() const {
[5212]53    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
54    Module * m = iBuilder->getModule(); 
55
56    Type * const int32ty = iBuilder->getInt32Ty();
57    Type * const int8ty = iBuilder->getInt8Ty();
58
59    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
60       
61    BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0);
62   
63    iBuilder->SetInsertPoint(entryBlock);
64
65    Value * kernelStuctParam = getParameter(doBlockFunction, "self");
66    Value * pattBuf = getScalarField(kernelStuctParam, "pattStream");
67    Value * stideCarryArr = getScalarField(kernelStuctParam, "srideCarry");
68    Value * blockNo = getScalarField(kernelStuctParam, blockNoScalar);
69    Value * pattLen = ConstantInt::get(int32ty, mPatternLen+1);
70    Value * pattPos = ConstantInt::get(int32ty, 0);
71   
72    unsigned carryIdx = 0;
73
[5260]74    std::vector<std::vector<Value *>> e(mPatternLen, std::vector<Value *>(mEditDistance + 1));
75    std::vector<std::vector<Value *>> adv(mPatternLen, std::vector<Value *>(mEditDistance + 1));
76    std::vector<std::vector<int>> calculated(mPatternLen, std::vector<int>(mEditDistance + 1, 0));
77
[5212]78    Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
79    Value * bid = iBuilder->CreateCall(bidFunc);
[5240]80    Value * pattStartPtr = iBuilder->CreateGEP(pattBuf, iBuilder->CreateMul(pattLen, bid));
81    Value * pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
[5212]82    Value * pattCh = iBuilder->CreateLoad(pattPtr);
83    Value * pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
[5260]84    Value * pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
[5212]85    Value * pattStream = iBuilder->CreateLoad(pattStreamPtr);
86    pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
87
88    e[0][0] = pattStream;
89    for(unsigned j = 1; j <= mEditDistance; j++){
90      e[0][j] = iBuilder->allOnes();
91    }
92
93    for(unsigned i = 1; i<mPatternLen; i++){     
[5240]94        pattPtr = iBuilder->CreateGEP(pattStartPtr, pattPos);
[5212]95        pattCh = iBuilder->CreateLoad(pattPtr);
96        pattIdx = iBuilder->CreateAnd(iBuilder->CreateLShr(pattCh, 1), ConstantInt::get(int8ty, 3));
[5260]97        pattStreamPtr = getStream(kernelStuctParam, "CCStream", blockNo, iBuilder->CreateZExt(pattIdx, int32ty));
[5212]98        pattStream = iBuilder->CreateLoad(pattStreamPtr);
99
[5214]100        bitblock_advance_ci_co(iBuilder, e[i-1][0], 1, stideCarryArr, carryIdx++, adv, calculated, i-1, 0);
101        e[i][0] = iBuilder->CreateAnd(adv[i-1][0], pattStream); 
[5212]102        for(unsigned j = 1; j<= mEditDistance; j++){
[5214]103            bitblock_advance_ci_co(iBuilder, e[i-1][j], 1, stideCarryArr, carryIdx++, adv, calculated, i-1, j);
104            bitblock_advance_ci_co(iBuilder, e[i-1][j-1], 1, stideCarryArr, carryIdx++, adv, calculated, i-1, j-1);
105            bitblock_advance_ci_co(iBuilder, e[i][j-1], 1, stideCarryArr, carryIdx++, adv, calculated, i, j-1);
106            Value * tmp1 = iBuilder->CreateAnd(adv[i-1][j], pattStream);
107            Value * tmp2 = iBuilder->CreateAnd(adv[i-1][j-1], iBuilder->CreateNot(pattStream));
108            Value * tmp3 = iBuilder->CreateOr(adv[i][j-1], e[i-1][j-1]);
[5212]109            e[i][j] = iBuilder->CreateOr(iBuilder->CreateOr(tmp1, tmp2), tmp3);
110
111        }
112        pattPos = iBuilder->CreateAdd(pattPos, ConstantInt::get(int32ty, 1));
113    }
114
[5260]115    Value * ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(0));
[5212]116    iBuilder->CreateStore(e[mPatternLen-1][0], ptr);
117    for(unsigned j = 1; j<= mEditDistance; j++){
[5260]118        ptr = getStream(kernelStuctParam, "ResultStream", blockNo, iBuilder->getInt32(j));
119        iBuilder->CreateStore(iBuilder->CreateAnd(e[mPatternLen - 1][j], iBuilder->CreateNot(e[mPatternLen - 1][j - 1])), ptr);
[5212]120    }
121
[5247]122    Value * produced = getProducedItemCount(kernelStuctParam, "ResultStream");
[5246]123    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
[5247]124    setProducedItemCount(kernelStuctParam, "ResultStream", produced); 
[5212]125       
126    iBuilder->CreateRetVoid();
127    iBuilder->restoreIP(savePoint);
128}
129
[5260]130editdGPUKernel::editdGPUKernel(IDISA::IDISA_Builder * b, unsigned dist, unsigned pattLen) :
131KernelBuilder(b, "editd_gpu",
132              {Binding{b->getStreamSetTy(4), "CCStream"}},
133              {Binding{b->getStreamSetTy(dist + 1), "ResultStream"}},
134              {Binding{PointerType::get(b->getInt8Ty(), 1), "pattStream"},
135              Binding{PointerType::get(ArrayType::get(b->getBitBlockType(), pattLen * (dist + 1) * 4), 0), "srideCarry"}},
136              {},
137              {Binding{b->getBitBlockType(), "EOFmask"}}),
138mEditDistance(dist),
139mPatternLen(pattLen) {
140
[5212]141}
142
[5260]143}
[5212]144
[5260]145
Note: See TracBrowser for help on using the repository browser.