Changeset 5151 for icGREP/icgrep-devel


Ignore:
Timestamp:
Sep 8, 2016, 3:24:31 PM (3 years ago)
Author:
lindanl
Message:

Add NVPTX Arch to the framework. Fix directory bug in make check.

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5109 r5151  
    3939add_definitions( ${LLVM_DEFINITIONS} )
    4040
     41option(ENABLE_CUDA_COMPILE "Compiling with CUDA")
     42set(CUDA_LIB "")
     43if(ENABLE_CUDA_COMPILE)
     44  find_package(CUDA)
     45  if(CUDA_FOUND)
     46    set(CUDA_INCLUDE ${CUDA_INCLUDE} "/usr/local/cuda-7.5/include")
     47    include_directories(${CUDA_INCLUDE})
     48    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_ENABLED")
     49    SET(CUDA_LIB "cuda")
     50  endif(CUDA_FOUND)
     51endif()
     52
    4153# Let's suppose we want to build a JIT compiler with support for
    4254# binary code (no interpreter):
    43 llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader Linker) # ipo
     55set(LLVM_ALL_TARGETS AArch64 AMDGPU ARM BPF CppBackend Hexagon Mips MSP430 NVPTX PowerPC Sparc SystemZ X86 XCore)
     56llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES ${LLVM_ALL_TARGETS} mcjit native IRReader Linker)
    4457
    4558message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION} in ${LLVM_ROOT_DIR}")
     
    5568SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
    5669
    57 SET(IDISA_SRC IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_target.cpp)
     70SET(IDISA_SRC IDISA/idisa_builder.cpp IDISA/idisa_avx_builder.cpp IDISA/idisa_i64_builder.cpp IDISA/idisa_sse_builder.cpp IDISA/idisa_nvptx_builder.cpp IDISA/idisa_target.cpp)
    5871
    5972SET(PABLO_SRC pablo/pabloAST.cpp pablo/ps_if.cpp pablo/ps_while.cpp pablo/function.cpp pablo/codegenstate.cpp pablo/builder.cpp pablo/symbol_generator.cpp pablo/printer_pablos.cpp pablo/pablo_toolchain.cpp pablo/passes/flattenif.cpp)
     
    132145target_link_libraries(wc ${Boost_LIBRARIES})
    133146
    134 target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
    135 #target_link_libraries (symtbl UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
    136 target_link_libraries (u8u16 UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
    137 target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES})
     147target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${CUDA_LIB})
     148#target_link_libraries (symtbl UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${CUDA_LIB})
     149target_link_libraries (u8u16 UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${CUDA_LIB})
     150target_link_libraries (wc UCDlib PabloADT RegExpCompiler CCADT CodeGen ${REQ_LLVM_LIBRARIES} ${CUDA_LIB})
    138151
    139152IF(ENABLE_MULTIPLEXING)
     
    209222  NAME abc_test
    210223  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA
    211   COMMAND ./run_abc ../icgrep-build/icgrep)
     224  COMMAND ./run_abc ${CMAKE_BINARY_DIR}/icgrep)
    212225
    213226add_custom_target (abc_test
  • icGREP/icgrep-devel/icgrep/IDISA/CudaDriver.h

    r5129 r5151  
    1515
    1616/// main - Program entry point
    17 int RunPTX(std::string PTXFilename, char * fileBuffer, ulong filesize) {
     17ulong * RunPTX(std::string PTXFilename, char * fileBuffer, ulong filesize, bool CountOnly) {
    1818 
    1919  CUdevice    device;
     
    3737  if (devMajor < 2) {
    3838    std::cerr << "ERROR: Device 0 is not SM 2.0 or greater\n";
    39     return 1;
     39    exit(-1);
    4040  }
    4141
     
    5555
    5656  // Get kernel function
    57   checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "kernel"));
     57  checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "GPU_Main"));
    5858
    5959  // Device data
     
    6363
    6464  int groupSize = GROUPTHREADS * sizeof(ulong) * 8;
    65   int bufferSize = (filesize/groupSize + 1) * groupSize;
     65  int groups = filesize/groupSize + 1;
     66  int bufferSize = groups * groupSize;
     67  int outputSize = 0;
    6668
    6769  checkCudaErrors(cuMemAlloc(&devBufferInput, bufferSize));
    68   // checkCudaErrors(cuMemsetD8(devBufferInput, 0, bufferSize));
    6970  checkCudaErrors(cuMemAlloc(&devBufferSize, sizeof(ulong)));
    70   checkCudaErrors(cuMemAlloc(&devBufferOutput, sizeof(ulong)*GROUPTHREADS));
     71  if (CountOnly){
     72    outputSize = sizeof(ulong) * GROUPTHREADS;
     73  }
     74  else{
     75    outputSize = sizeof(ulong) * 2 * GROUPTHREADS * groups;
     76  }
     77
     78  checkCudaErrors(cuMemAlloc(&devBufferOutput, outputSize));
    7179
    7280  //Copy from host to device
     
    92100  // std::cout << "kernel success.\n";
    93101  // Retrieve device data
    94   ulong * matchCount = (ulong *) malloc(sizeof(ulong)*GROUPTHREADS);
    95   checkCudaErrors(cuMemcpyDtoH(matchCount, devBufferOutput, sizeof(ulong)*GROUPTHREADS));
    96102
    97   int count = 0;
    98   for (unsigned i = 0; i < GROUPTHREADS; ++i) {
    99     count += matchCount[i];
    100     // std::cout << i << ":" << matchCount[i] << "\n";
     103  ulong * matchRslt = (ulong *) malloc(outputSize);
     104  checkCudaErrors(cuMemcpyDtoH(matchRslt, devBufferOutput, outputSize));
     105  if (CountOnly){
     106    int count = 0;
     107    for (unsigned i = 0; i < GROUPTHREADS; ++i) {
     108      count += matchRslt[i];
     109    }
     110    std::cout << count << "\n";
    101111  }
    102   std::cout << count << "\n";
     112
    103113
    104114  // Clean-up
     
    109119  checkCudaErrors(cuCtxDestroy(context));
    110120
    111   return 0;
     121  return matchRslt;
    112122}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_target.cpp

    r5108 r5151  
    88#include <IDISA/idisa_sse_builder.h>
    99#include <IDISA/idisa_i64_builder.h>
     10#include <IDISA/idisa_nvptx_builder.h>
    1011
    1112namespace IDISA {
     
    3637}
    3738
     39IDISA_Builder * GetIDISA_GPU_Builder(Module * mod) {
     40    return new IDISA_NVPTX20_Builder(mod, 64);
    3841}
     42
     43}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_target.h

    r5033 r5151  
    1515IDISA::IDISA_Builder * GetIDISA_Builder(Module * m);
    1616
     17IDISA::IDISA_Builder * GetIDISA_GPU_Builder(Module * m);
     18
    1719}
    1820
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5142 r5151  
    2424#include <llvm/IR/Verifier.h>
    2525#include <llvm/IR/TypeBuilder.h>
     26#include <llvm/IR/InlineAsm.h>
    2627#include <UCD/UnicodeNameData.h>
    2728
     
    6970#include <kernels/kernel.h>
    7071
     72#ifdef CUDA_ENABLED
     73#include <IDISA/CudaDriver.h>
     74#endif
     75
    7176static cl::OptionCategory bGrepOutputOptions("Output Options",
    7277                                             "These options control the output.");
     
    8489
    8590bool isUTF_16 = false;
     91std::string IRFilename = "icgrep.ll";
     92std::string PTXFilename = "icgrep.ptx";
    8693
    8794void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16) {
     
    101108            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
    102109            char * fileBuffer = const_cast<char *>(source.data());
     110#ifdef CUDA_ENABLED 
     111            if(codegen::NVPTX){
     112                ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly);
     113                if (CountOnly){
     114                    exit(0);
     115                }
     116                else{
     117                    mGrepFunction_CPU((char *)rslt, fileBuffer, fileSize, fileIdx);
     118                    return;
     119                }
     120               
     121            }
     122#endif
    103123            if (CountOnly) {
    104124                total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx);
     
    111131        }
    112132    } else {
     133#ifdef CUDA_ENABLED
     134        if (codegen::NVPTX){
     135            std::cout << 0 << std::endl;
     136            exit(0);
     137        }
     138#endif
    113139        if (CountOnly) {
    114140            total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
     
    121147using namespace parabix;
    122148
    123 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
    124     isUTF_16 = UTF_16;
    125     Module * M = new Module(moduleName, getGlobalContext());
    126    
    127     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);
    128 
    129     const unsigned segmentSize = codegen::SegmentSize;
    130     const unsigned bufferSegments = codegen::BufferSegments;
    131 
    132     unsigned encodingBits = UTF_16 ? 16 : 8;
    133 
    134     mIsNameExpression = isNameExpression;
    135 
     149void createBallotFunction(Module * m, IDISA::IDISA_Builder * iBuilder){
    136150    Type * const int32ty = iBuilder->getInt32Ty();
     151    Type * const int1ty = iBuilder->getInt1Ty();
     152    Function * const ballotFn = cast<Function>(m->getOrInsertFunction("ballot_nvptx", int32ty, int1ty, nullptr));
     153    ballotFn->setCallingConv(CallingConv::C);
     154    Function::arg_iterator args = ballotFn->arg_begin();
     155
     156    Value * const input = &*(args++);
     157    input->setName("input");
     158
     159    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", ballotFn, 0));
     160
     161    Value * conv = iBuilder->CreateZExt(input, int32ty);
     162
     163    std::ostringstream AsmStream;
     164    AsmStream << "{.reg .pred %p1; ";
     165    AsmStream << "setp.ne.u32 %p1, $1, 0; ";
     166    AsmStream << "vote.ballot.b32  $0, %p1;}";
     167    FunctionType * AsmFnTy = FunctionType::get(int32ty, int32ty, false);
     168    llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFnTy, AsmStream.str(), "=r,r", true, false);
     169    llvm::CallInst * result = iBuilder->CreateCall(IA, conv);
     170    result->addAttribute(llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoUnwind);
     171
     172    iBuilder->CreateRet(result);
     173}
     174
     175Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
     176    Type * const int64ty = iBuilder->getInt64Ty();
     177    Type * const inputType = PointerType::get(int64ty, 1);
     178    Type * const resultTy = iBuilder->getVoidTy();
     179    Function * kernelFunc = cast<Function>(m->getOrInsertFunction("GPU_Main", resultTy, inputType, inputType, inputType, nullptr));
     180    kernelFunc->setCallingConv(CallingConv::C);
     181    Function::arg_iterator args = kernelFunc->arg_begin();
     182
     183    Value * const inputPtr = &*(args++);
     184    inputPtr->setName("inputPtr");
     185    Value * const bufferSizePtr = &*(args++);
     186    bufferSizePtr->setName("bufferSizePtr");
     187    Value * const outputPtr = &*(args++);
     188    outputPtr->setName("resultPtr");
     189
     190    BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", kernelFunc, 0);
     191    iBuilder->SetInsertPoint(entryBlock);
     192
     193    Function * tidFunc = m->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     194    Value * id = iBuilder->CreateCall(tidFunc);
     195
     196    Function * mainFunc = m->getFunction("Main");
     197    Value * inputThreadPtr = iBuilder->CreateGEP(inputPtr, iBuilder->CreateMul(id, iBuilder->getInt32(8)));
     198    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), 8), 1), 1);
     199    Value * inputStream = iBuilder->CreateBitCast(inputThreadPtr, inputStreamType);   
     200    Value * bufferSize = iBuilder->CreateLoad(bufferSizePtr);
     201    if (CountOnly){
     202        Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, id);
     203        Value * result = iBuilder->CreateCall(mainFunc, {inputStream, bufferSize});
     204        iBuilder->CreateStore(result, outputThreadPtr);
     205    }
     206    else {
     207        Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateMul(id, iBuilder->getInt32(2)));
     208        Type * const outputStremType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), 1);
     209        Value * outputStream = iBuilder->CreateBitCast(outputThreadPtr, outputStremType);
     210        iBuilder->CreateCall(mainFunc, {inputStream, bufferSize, outputStream});
     211    }   
     212
     213    iBuilder->CreateRetVoid();
     214    return kernelFunc;
     215}
     216
     217Function * generateCPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool isNameExpression){
    137218    Type * const size_ty = iBuilder->getSizeTy();
    138219    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    139     Type * const voidTy = Type::getVoidTy(M->getContext());   
    140     Type * const voidPtrTy = TypeBuilder<void *, false>::get(M->getContext());
    141     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), (UTF_16 ? 16 : 8)), 1), 0);
    142     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    143     Function * const mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    144     mainFn->setCallingConv(CallingConv::C);
    145     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    146     Function::arg_iterator args = mainFn->arg_begin();
    147    
     220    Type * const rsltType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), 0);
     221    Function * const mainCPUFn = cast<Function>(m->getOrInsertFunction("CPU_Main", iBuilder->getVoidTy(), rsltType, int8PtrTy, size_ty, size_ty, nullptr));
     222    mainCPUFn->setCallingConv(CallingConv::C);
     223    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mainCPUFn, 0));
     224    Function::arg_iterator args = mainCPUFn->arg_begin();
     225   
     226    Value * const rsltStream = &*(args++);
     227    rsltStream->setName("rslt");
    148228    Value * const inputStream = &*(args++);
    149229    inputStream->setName("input");
     
    152232    Value * const fileIdx = &*(args++);
    153233    fileIdx->setName("fileIdx");
     234
     235    ExternalFileBuffer MatchResults(iBuilder, StreamSetType(2, i1));
     236    MatchResults.setStreamSetBuffer(rsltStream, fileSize);
     237
     238    kernel::scanMatchKernel scanMatchK(iBuilder, isNameExpression);
     239    scanMatchK.generateKernel({&MatchResults}, {});
     240           
     241    Value * scanMatchInstance = scanMatchK.createInstance({inputStream, fileSize, fileIdx});
     242   
     243    generatePipelineLoop(iBuilder, {&scanMatchK}, {scanMatchInstance}, fileSize);
     244    iBuilder->CreateRetVoid();
     245
     246    return mainCPUFn;
     247}
     248
     249void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
     250    isUTF_16 = UTF_16;
     251    int addrSpace = 0;
     252    bool CPU_Only = true;
     253
     254    Module * cpuM = new Module(moduleName+":cpu", getGlobalContext());
     255    IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
     256    Module * M = cpuM; 
     257    IDISA::IDISA_Builder * iBuilder = CPUBuilder;
     258
     259#ifdef CUDA_ENABLED
     260    setNVPTXOption();
     261    if(codegen::NVPTX){     
     262        Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
     263        IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
     264        M = gpuM;
     265        iBuilder = GPUBuilder;
     266        M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
     267        M->setTargetTriple("nvptx64-nvidia-cuda");
     268        addrSpace = 1;
     269        CPU_Only = false;
     270    }   
     271#endif
     272
     273    const unsigned segmentSize = codegen::SegmentSize;
     274    const unsigned bufferSegments = codegen::BufferSegments;
     275
     276    unsigned encodingBits = UTF_16 ? 16 : 8;
     277
     278    mIsNameExpression = isNameExpression;
     279
     280    Type * const int32ty = iBuilder->getInt32Ty();
     281    Type * const size_ty = iBuilder->getSizeTy();
     282    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     283    Type * const voidTy = Type::getVoidTy(M->getContext());   
     284    Type * const voidPtrTy = TypeBuilder<void *, false>::get(M->getContext());
     285    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), (UTF_16 ? 16 : 8)), 1), addrSpace);
     286    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     287
     288    Function * mainFn = nullptr;
     289    Value * inputStream = nullptr;
     290    Value * fileSize = nullptr;
     291    Value * fileIdx = nullptr;
     292
     293#ifdef CUDA_ENABLED   
     294    Value * outputStream = nullptr;
     295    Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 2), addrSpace);
     296    if (codegen::NVPTX){
     297        if (CountOnly){
     298            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
     299            mainFn->setCallingConv(CallingConv::C);
     300            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     301            Function::arg_iterator args = mainFn->arg_begin();
     302           
     303            inputStream = &*(args++);
     304            inputStream->setName("input");
     305            fileSize = &*(args++);
     306            fileSize->setName("fileSize");
     307        }
     308        else{
     309            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
     310            mainFn->setCallingConv(CallingConv::C);
     311            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     312            Function::arg_iterator args = mainFn->arg_begin();
     313           
     314            inputStream = &*(args++);
     315            inputStream->setName("input");
     316            fileSize = &*(args++);
     317            fileSize->setName("fileSize");
     318            outputStream = &*(args++);
     319            outputStream->setName("output");
     320        }
     321    }
     322#endif
     323    if (CPU_Only){
     324        mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));   
     325        mainFn->setCallingConv(CallingConv::C);
     326        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     327        Function::arg_iterator args = mainFn->arg_begin();
     328       
     329        inputStream = &*(args++);
     330        inputStream->setName("input");
     331        fileSize = &*(args++);
     332        fileSize->setName("fileSize");
     333        fileIdx = &*(args++);
     334        fileIdx->setName("fileIdx");
     335    }
    154336       
    155337    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(1, i8));
     
    207389    }
    208390    else {
    209         CircularBuffer MatchResults(iBuilder, StreamSetType(2, i1), segmentSize * bufferSegments);
    210         MatchResults.allocateBuffer();
    211 
    212         pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
    213         icgrepK.generateKernel({&BasisBits},  {&MatchResults});
    214         Value * icgrepInstance = icgrepK.createInstance({});
    215 
    216         kernel::scanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
    217         scanMatchK.generateKernel({&MatchResults}, {});               
    218         Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
    219 
    220         if (pipelineParallel){
    221             generatePipelineParallel(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance});
    222         }
    223         else{
    224             generatePipelineLoop(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance}, fileSize);
     391#ifdef CUDA_ENABLED
     392        if (codegen::NVPTX){
     393            ExternalFileBuffer MatchResults(iBuilder, StreamSetType(2, i1), addrSpace);
     394            MatchResults.setStreamSetBuffer(outputStream, fileSize);
     395
     396            pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
     397            icgrepK.generateKernel({&BasisBits},  {&MatchResults});
     398            Value * icgrepInstance = icgrepK.createInstance({});
     399
     400            generatePipelineLoop(iBuilder, {&s2pk, &icgrepK}, {s2pInstance, icgrepInstance}, fileSize);
     401
     402        }
     403#endif
     404        if (CPU_Only){
     405            CircularBuffer MatchResults(iBuilder, StreamSetType(2, i1), segmentSize * bufferSegments);
     406            MatchResults.allocateBuffer();
     407
     408            pablo::PabloKernel  icgrepK(iBuilder, "icgrep", function, {});
     409            icgrepK.generateKernel({&BasisBits},  {&MatchResults});
     410            Value * icgrepInstance = icgrepK.createInstance({});
     411
     412            kernel::scanMatchKernel scanMatchK(iBuilder, mIsNameExpression);
     413            scanMatchK.generateKernel({&MatchResults}, {});               
     414            Value * scanMatchInstance = scanMatchK.createInstance({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     415
     416            if (pipelineParallel){
     417                generatePipelineParallel(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance});
     418            }
     419            else{
     420                generatePipelineLoop(iBuilder, {&s2pk, &icgrepK, &scanMatchK}, {s2pInstance, icgrepInstance, scanMatchInstance}, fileSize);
     421            }
    225422        }
    226423
     
    228425
    229426    }
     427
     428#ifdef CUDA_ENABLED
     429    Function * mainCPUFn = nullptr;
     430    if(codegen::NVPTX){
     431        Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
     432        MDNode * Node = MDNode::get(M->getContext(),
     433                                    {llvm::ValueAsMetadata::get(kernelFunction),
     434                                     MDString::get(M->getContext(), "kernel"),
     435                                     ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
     436        NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
     437        NMD->addOperand(Node);
     438   
     439        Compile2PTX(M, IRFilename, PTXFilename);
     440        mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mIsNameExpression);
     441        if (CountOnly) return;
     442    }
     443#endif
     444   
    230445     
    231     mEngine = JIT_to_ExecutionEngine(M);
     446    mEngine = JIT_to_ExecutionEngine(cpuM);
    232447    ApplyObjectCache(mEngine);
    233     icgrep_Linking(M, mEngine);
     448    icgrep_Linking(cpuM, mEngine);
    234449
    235450#ifndef NDEBUG
     
    243458        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    244459    } else {
    245         mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     460#ifdef CUDA_ENABLED
     461        if(codegen::NVPTX){
     462            mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(mEngine->getPointerToFunction(mainCPUFn));
     463        }
     464#endif
     465        if (CPU_Only) {
     466            mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     467        }
    246468    }
    247469
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5107 r5151  
    2020    typedef void (*GrepFunctionType)(char * byte_data, size_t filesize, const int fileIdx);
    2121    typedef uint64_t (*GrepFunctionType_CountOnly)(char * byte_data, size_t filesize, const int fileIdx);
     22    typedef void (*GrepFunctionType_CPU)(char * rslt, char * byte_data, size_t filesize, const int fileIdx);
    2223public:
    2324
     
    3637    GrepFunctionType mGrepFunction;
    3738    GrepFunctionType_CountOnly mGrepFunction_CountOnly;
     39    GrepFunctionType_CPU mGrepFunction_CPU;
    3840
    3941    bool mIsNameExpression;
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5135 r5151  
    2626
    2727#include <object_cache.h>
    28 
     28#include <IDISA/llvm2ptx.h>
     29 
    2930using namespace llvm;
    3031
     
    6162const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
    6263
    63 }
     64#ifdef CUDA_ENABLED
     65bool NVPTX;
     66static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
     67#endif
     68
     69}
     70
     71
     72#ifdef CUDA_ENABLED
     73void setNVPTXOption(){
     74    codegen::NVPTX = codegen::USENVPTX;
     75}
     76
     77void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
     78    InitializeAllTargets();
     79    InitializeAllTargetMCs();
     80    InitializeAllAsmPrinters();
     81    InitializeAllAsmParsers();
     82
     83    PassRegistry *Registry = PassRegistry::getPassRegistry();
     84    initializeCore(*Registry);
     85    initializeCodeGen(*Registry);
     86    initializeLoopStrengthReducePass(*Registry);
     87    initializeLowerIntrinsicsPass(*Registry);
     88    initializeUnreachableBlockElimPass(*Registry);
     89
     90    std::error_code error;
     91    llvm::raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
     92    m->print(out, nullptr);
     93
     94    if (LLVM_UNLIKELY(codegen::DumpGeneratedIR))
     95            m->dump();
     96
     97    llvm2ptx(IRFilename, PTXFilename);
     98}
     99#endif
    64100
    65101
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5135 r5151  
    2020extern int SegmentSize;  // set from command line
    2121extern int BufferSegments;
     22#ifdef CUDA_ENABLED
     23extern bool NVPTX;
     24#endif
     25}
    2226
    23 }
     27#ifdef CUDA_ENABLED
     28void setNVPTXOption();
     29void Compile2PTX (llvm::Module * m, std::string IRFilename, std::string PTXFilename);
     30#endif
    2431
    2532bool AVX2_available();
Note: See TracChangeset for help on using the changeset viewer.