Ignore:
Timestamp:
Sep 8, 2016, 3:24:31 PM (3 years ago)
Author:
lindanl
Message:

Add NVPTX Arch to the framework. Fix directory bug in make check.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5142 r5151  
    2424#include <llvm/IR/Verifier.h>
    2525#include <llvm/IR/TypeBuilder.h>
     26#include <llvm/IR/InlineAsm.h>
    2627#include <UCD/UnicodeNameData.h>
    2728
     
    6970#include <kernels/kernel.h>
    7071
     72#ifdef CUDA_ENABLED
     73#include <IDISA/CudaDriver.h>
     74#endif
     75
    7176static cl::OptionCategory bGrepOutputOptions("Output Options",
    7277                                             "These options control the output.");
     
    8489
    8590bool isUTF_16 = false;
     91std::string IRFilename = "icgrep.ll";
     92std::string PTXFilename = "icgrep.ptx";
    8693
    8794void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16) {
     
    101108            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
    102109            char * fileBuffer = const_cast<char *>(source.data());
     110#ifdef CUDA_ENABLED 
     111            if(codegen::NVPTX){
     112                ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly);
     113                if (CountOnly){
     114                    exit(0);
     115                }
     116                else{
     117                    mGrepFunction_CPU((char *)rslt, fileBuffer, fileSize, fileIdx);
     118                    return;
     119                }
     120               
     121            }
     122#endif
    103123            if (CountOnly) {
    104124                total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx);
     
    111131        }
    112132    } else {
     133#ifdef CUDA_ENABLED
     134        if (codegen::NVPTX){
     135            std::cout << 0 << std::endl;
     136            exit(0);
     137        }
     138#endif
    113139        if (CountOnly) {
    114140            total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
     
    121147using namespace parabix;
    122148
    123 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
    124     isUTF_16 = UTF_16;
    125     Module * M = new Module(moduleName, getGlobalContext());
    126    
    127     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);
    128 
    129     const unsigned segmentSize = codegen::SegmentSize;
    130     const unsigned bufferSegments = codegen::BufferSegments;
    131 
    132     unsigned encodingBits = UTF_16 ? 16 : 8;
    133 
    134     mIsNameExpression = isNameExpression;
    135 
     149void createBallotFunction(Module * m, IDISA::IDISA_Builder * iBuilder){
    136150    Type * const int32ty = iBuilder->getInt32Ty();
     151    Type * const int1ty = iBuilder->getInt1Ty();
     152    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
     153    ballotFn->setCallingConv(CallingConv::C);
     154    Function::arg_iterator args = ballotFn->arg_begin();
     155
     156    Value * const input = &*(args++);
     157    input->setName("input");
     158
     159    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
     160
     161    Value * conv = iBuilder->CreateZExt(input, int32ty);
     162
     163    std::ostringstream AsmStream;
     164    AsmStream << "{.reg .pred %p1; ";
     165    AsmStream << "setp.ne.u32 %p1, $1, 0; ";
     166    AsmStream << "vote.ballot.b32  $0, %p1;}";
     167    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
     168    llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFnTy, AsmStream.str(), "=r,r", true, false);
     169    llvm::CallInst * result = iBuilder->CreateCall(IA, conv);
     170    result->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoUnwind);
     171
     172    iBuilder->CreateRet(result);
     173}
     174
     175Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
     176    Type * const int64ty = iBuilder->getInt64Ty();
     177    Type * const inputType = PointerType::get(int64ty, 1);
     178    Type * const resultTy = iBuilder->getVoidTy();
     179    Function * kernelFunc = cast<Function>(m->getOrInsertFunction("GPU_Main", resultTy, inputType, inputType, inputType, nullptr));
     180    kernelFunc->setCallingConv(CallingConv::C);
     181    Function::arg_iterator args = kernelFunc->arg_begin();
     182
     183    Value * const inputPtr = &*(args++);
     184    inputPtr->setName("inputPtr");
     185    Value * const bufferSizePtr = &*(args++);
     186    bufferSizePtr->setName("bufferSizePtr");
     187    Value * const outputPtr = &*(args++);
     188    outputPtr->setName("resultPtr");
     189
     190    BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", kernelFunc, 0);
     191    iBuilder->SetInsertPoint(entryBlock);
     192
     193    Function * tidFunc = m->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     194    Value * id = iBuilder->CreateCall(tidFunc);
     195
     196    Function * mainFunc = m->getFunction("Main");
     197    Value * inputThreadPtr = iBuilder->CreateGEP(inputPtr, iBuilder->CreateMul(id, iBuilder->getInt32(8)));
     198    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), 8), 1), 1);
     199    Value * inputStream = iBuilder->CreateBitCast(inputThreadPtr, inputStreamType);   
     200    Value * bufferSize = iBuilder->CreateLoad(bufferSizePtr);
     201    if (CountOnly){
     202        Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, id);
     203        Value * result = iBuilder->CreateCall(mainFunc, {inputStream, bufferSize});
     204        iBuilder->CreateStore(result, outputThreadPtr);
     205    }
     206    else {
     207        Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateMul(id, iBuilder->getInt32(2)));
     208        Type * const outputStremType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), 1);
     209        Value * outputStream = iBuilder->CreateBitCast(outputThreadPtr, outputStremType);
     210        iBuilder->CreateCall(mainFunc, {inputStream, bufferSize, outputStream});
     211    }   
     212
     213    iBuilder->CreateRetVoid();
     214    return kernelFunc;
     215}
     216
     217Function * generateCPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool isNameExpression){
    137218    Type * const size_ty = iBuilder->getSizeTy();
    138219    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    139     Type * const voidTy = Type::getVoidTy(M->getContext());   
    140     Type * const voidPtrTy = TypeBuilder<void *, false>::get(M->getContext());
    141     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), (UTF_16 ? 16 : 8)), 1), 0);
    142     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    143     Function * const mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    144     mainFn->setCallingConv(CallingConv::C);
    145     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    146     Function::arg_iterator args = mainFn->arg_begin();
    147    
     220    Type * const rsltType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), 0);
     221    Function * const mainCPUFn = cast<Function>(m->getOrInsertFunction("CPU_Main", iBuilder->getVoidTy(), rsltType, int8PtrTy, size_ty, size_ty, nullptr));
     222    mainCPUFn->setCallingConv(CallingConv::C);
     223    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mainCPUFn, 0));
     224    Function::arg_iterator args = mainCPUFn->arg_begin();
     225   
     226    Value * const rsltStream = &*(args++);
     227    rsltStream->setName("rslt");
    148228    Value * const inputStream = &*(args++);
    149229    inputStream->setName("input");
     
    152232    Value * const fileIdx = &*(args++);
    153233    fileIdx->setName("fileIdx");
     234
     235    ExternalFileBuffer MatchResults(iBuilder, StreamSetType(2, i1));
     236    MatchResults.setStreamSetBuffer(rsltStream, fileSize);
     237
     238    kernel::scanMatchKernel scanMatchK(iBuilder, isNameExpression);
     239    scanMatchK.generateKernel({&MatchResults}, {});
     240           
     241    Value * scanMatchInstance = scanMatchK.createInstance({inputStream, fileSize, fileIdx});
     242   
     243    generatePipelineLoop(iBuilder, {&scanMatchK}, {scanMatchInstance}, fileSize);
     244    iBuilder->CreateRetVoid();
     245
     246    return mainCPUFn;
     247}
     248
     249void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
     250    isUTF_16 = UTF_16;
     251    int addrSpace = 0;
     252    bool CPU_Only = true;
     253
     254    Module * cpuM = new Module(moduleName+":cpu", getGlobalContext());
     255    IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
     256    Module * M = cpuM; 
     257    IDISA::IDISA_Builder * iBuilder = CPUBuilder;
     258
     259#ifdef CUDA_ENABLED
     260    setNVPTXOption();
     261    if(codegen::NVPTX){     
     262        Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
     263        IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
     264        M = gpuM;
     265        iBuilder = GPUBuilder;
     266        M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
     267        M->setTargetTriple("nvptx64-nvidia-cuda");
     268        addrSpace = 1;
     269        CPU_Only = false;
     270    }   
     271#endif
     272
     273    const unsigned segmentSize = codegen::SegmentSize;
     274    const unsigned bufferSegments = codegen::BufferSegments;
     275
     276    unsigned encodingBits = UTF_16 ? 16 : 8;
     277
     278    mIsNameExpression = isNameExpression;
     279
     280    Type * const int32ty = iBuilder->getInt32Ty();
     281    Type * const size_ty = iBuilder->getSizeTy();
     282    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     283    Type * const voidTy = Type::getVoidTy(M->getContext());   
     284    Type * const voidPtrTy = TypeBuilder<void *, false>::get(M->getContext());
     285    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), (UTF_16 ? 16 : 8)), 1), addrSpace);
     286    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     287
     288    Function * mainFn = nullptr;
     289    Value * inputStream = nullptr;
     290    Value * fileSize = nullptr;
     291    Value * fileIdx = nullptr;
     292
     293#ifdef CUDA_ENABLED   
     294    Value * outputStream = nullptr;
     295    Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), addrSpace);
     296    if (codegen::NVPTX){
     297        if (CountOnly){
     298            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
     299            mainFn->setCallingConv(CallingConv::C);
     300            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     301            Function::arg_iterator args = mainFn->arg_begin();
     302           
     303            inputStream = &*(args++);
     304            inputStream->setName("input");
     305            fileSize = &*(args++);
     306            fileSize->setName("fileSize");
     307        }
     308        else{
     309            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
     310            mainFn->setCallingConv(CallingConv::C);
     311            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     312            Function::arg_iterator args = mainFn->arg_begin();
     313           
     314            inputStream = &*(args++);
     315            inputStream->setName("input");
     316            fileSize = &*(args++);
     317            fileSize->setName("fileSize");
     318            outputStream = &*(args++);
     319            outputStream->setName("output");
     320        }
     321    }
     322#endif
     323    if (CPU_Only){
     324        mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));   
     325        mainFn->setCallingConv(CallingConv::C);
     326        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     327        Function::arg_iterator args = mainFn->arg_begin();
     328       
     329        inputStream = &*(args++);
     330        inputStream->setName("input");
     331        fileSize = &*(args++);
     332        fileSize->setName("fileSize");
     333        fileIdx = &*(args++);
     334        fileIdx->setName("fileIdx");
     335    }
    154336       
    155337    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(1, i8));
     
    207389    }
    208390    else {
    209         CircularBuffer MatchResults(iBuilder, StreamSetType(2, i1), segmentSize * bufferSegments);
    210         MatchResults.allocateBuffer();
    211 
    212         pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
    213         icgrepK.generateKernel({&BasisBits},  {&MatchResults});
    214         Value * icgrepInstance = icgrepK.createInstance({});
    215 
    216         kernel::scanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
    217         scanMatchK.generateKernel({&MatchResults}, {});               
    218         Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
    219 
    220         if (pipelineParallel){
    221             generatePipelineParallel(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance});
    222         }
    223         else{
    224             generatePipelineLoop(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance}, fileSize);
     391#ifdef CUDA_ENABLED
     392        if (codegen::NVPTX){
     393            ExternalFileBuffer MatchResults(iBuilder, StreamSetType(2, i1), addrSpace);
     394            MatchResults.setStreamSetBuffer(outputStream, fileSize);
     395
     396            pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
     397            icgrepK.generateKernel({&BasisBits},  {&MatchResults});
     398            Value * icgrepInstance = icgrepK.createInstance({});
     399
     400            generatePipelineLoop(iBuilder, {&s2pk, &icgrepK}, {s2pInstance, icgrepInstance}, fileSize);
     401
     402        }
     403#endif
     404        if (CPU_Only){
     405            CircularBuffer MatchResults(iBuilder, StreamSetType(2, i1), segmentSize * bufferSegments);
     406            MatchResults.allocateBuffer();
     407
     408            pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
     409            icgrepK.generateKernel({&BasisBits},  {&MatchResults});
     410            Value * icgrepInstance = icgrepK.createInstance({});
     411
     412            kernel::scanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
     413            scanMatchK.generateKernel({&MatchResults}, {});               
     414            Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     415
     416            if (pipelineParallel){
     417                generatePipelineParallel(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance});
     418            }
     419            else{
     420                generatePipelineLoop(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance}, fileSize);
     421            }
    225422        }
    226423
     
    228425
    229426    }
     427
     428#ifdef CUDA_ENABLED
     429    Function * mainCPUFn = nullptr;
     430    if(codegen::NVPTX){
     431        Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
     432        MDNode * Node = MDNode::get(M->getContext(),
     433                                    {llvm::ValueAsMetadata::get(kernelFunction),
     434                                     MDString::get(M->getContext(), "kernel"),
     435                                     ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
     436        NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
     437        NMD->addOperand(Node);
     438   
     439        Compile2PTX(M, IRFilename, PTXFilename);
     440        mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mIsNameExpression);
     441        if (CountOnly) return;
     442    }
     443#endif
     444   
    230445     
    231     mEngine = JIT_to_ExecutionEngine(M);
     446    mEngine = JIT_to_ExecutionEngine(cpuM);
    232447    ApplyObjectCache(mEngine);
    233     icgrep_Linking(M, mEngine);
     448    icgrep_Linking(cpuM, mEngine);
    234449
    235450#ifndef NDEBUG
     
    243458        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    244459    } else {
    245         mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     460#ifdef CUDA_ENABLED
     461        if(codegen::NVPTX){
     462            mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(mEngine->getPointerToFunction(mainCPUFn));
     463        }
     464#endif
     465        if (CPU_Only) {
     466            mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     467        }
    246468    }
    247469
Note: See TracChangeset for help on using the changeset viewer.