Ignore:
Timestamp:
May 19, 2017, 12:58:48 PM (2 years ago)
Author:
lindanl
Message:

Add NVPTX driver.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5457 r5458  
    5959SET(OBJECT_CACHE_SRC toolchain/object_cache.cpp)
    6060
    61 SET(TOOLCHAIN_SRC toolchain/toolchain.cpp toolchain/pipeline.cpp ${OBJECT_CACHE_SRC})
     61SET(TOOLCHAIN_SRC toolchain/toolchain.cpp  toolchain/pipeline.cpp ${OBJECT_CACHE_SRC})
    6262
    6363SET(KERNEL_SRC kernels/kernel.cpp kernels/streamset.cpp kernels/interface.cpp kernels/kernel_builder.cpp)
     
    206206
    207207SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
    208 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer")
     208SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer")
    209209
    210210add_test(
  • icGREP/icgrep-devel/icgrep/IR_Gen/CudaDriver.h

    r5425 r5458  
    5656
    5757  // Get kernel function
    58   checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "GPU_Main"));
     58  checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "Main"));
    5959
    6060  // Device data
     
    101101   
    102102  checkCudaErrors(cuMemAlloc(&devBufferInput, startPoints[numOfGroups]));
    103   // checkCudaErrors(cuMemsetD8(devBufferInput,0,startPoints[numOfGroups]));
     103  checkCudaErrors(cuMemsetD8(devBufferInput,0,startPoints[numOfGroups]));
    104104  checkCudaErrors(cuMemAlloc(&devStartPoints, sizeof(ulong) * (numOfGroups + 1)));
    105105  checkCudaErrors(cuMemAlloc(&devBufferSizes, sizeof(ulong) * numOfGroups));
     
    131131  void *KernelParams[] = { &devBufferInput, &devStartPoints, &devBufferSizes, &devBufferOutput};
    132132
    133   // std::cout << "Launching kernel\n";
     133  // std::cerr << "Launching kernel\n";
    134134
    135135  CUevent start;
     
    144144                                 blockSizeX, blockSizeY, blockSizeZ,
    145145                                 0, NULL, KernelParams, NULL));
    146   // std::cout << "kernel success.\n";
     146  // std::cerr << "kernel success.\n";
    147147
    148148  cuEventCreate(&stop, CU_EVENT_BLOCKING_SYNC);
     
    151151
    152152  cuEventElapsedTime(&elapsedTime, start, stop);
    153   printf("GPU Kernel time : %f ms\n" ,elapsedTime);
     153  // printf("GPU Kernel time : %f ms\n" ,elapsedTime);
    154154
    155155  // Retrieve device data
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5446 r5458  
    107107    virtual llvm::Value * bitblock_mask_from(llvm::Value * pos);
    108108    virtual llvm::Value * bitblock_set_bit(llvm::Value * pos);
     109
     110    virtual void CreateBaseFunctions(){};
    109111   
    110112    llvm::Value * simd_and(llvm::Value * a, llvm::Value * b);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.h

    r5436 r5458  
    1616public:
    1717   
    18     IDISA_NVPTX20_Builder(llvm::LLVMContext & C, unsigned registerWidth, unsigned vectorWidth, unsigned groupSize)
    19     : IDISA_Builder(C, registerWidth, registerWidth, (vectorWidth * groupSize))
    20     , IDISA_I64_Builder(C, registerWidth, registerWidth, (vectorWidth * groupSize))
    21     , groupThreads(groupSize) {
     18    IDISA_NVPTX20_Builder(llvm::LLVMContext & C, unsigned registerWidth, unsigned vectorWidth, unsigned stride)
     19    : IDISA_Builder(C, registerWidth, registerWidth, stride)
     20    , IDISA_I64_Builder(C, registerWidth, registerWidth, stride)
     21    , groupThreads(stride/vectorWidth) {
     22
     23    }
     24
     25    ~IDISA_NVPTX20_Builder() {}
     26    virtual std::string getBuilderUniqueName() override;
     27    int getGroupThreads();
     28
     29    void CreateBaseFunctions() override {
    2230        CreateGlobals();
    2331        CreateBuiltinFunctions();
     
    2533        CreateLongAddFunc();
    2634        CreateBallotFunc();
    27     }
    28    
    29     ~IDISA_NVPTX20_Builder() {}
    30     virtual std::string getBuilderUniqueName() override;
    31     int getGroupThreads();
     35    };
    3236   
    3337    Value * bitblock_any(Value * a) override;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5446 r5458  
    4444
    4545KernelBuilder * GetIDISA_GPU_Builder(llvm::LLVMContext & C) {
    46     return new KernelBuilderImpl<IDISA_NVPTX20_Builder>(C, 64, 64, 64);
     46    return new KernelBuilderImpl<IDISA_NVPTX20_Builder>(C, 64, 64, 64*64);
    4747}
    4848
  • icGREP/icgrep-devel/icgrep/IR_Gen/llvm2ptx.h

    r5176 r5458  
    1 #include <string>
    2 #include <iostream>
    3 #include <fstream>
    4 #include <sys/stat.h>
    5 #include <fcntl.h>
    6 #include <unistd.h>
    7 #include <cassert>
     1#ifndef LLVM2PTX_H
     2#define LLVM2PTX_H
    83
    9 #include "llvm/ADT/Triple.h"
    104#include "llvm/Analysis/TargetLibraryInfo.h"
    115#include "llvm/CodeGen/CommandFlags.h"
    12 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
    136#include "llvm/CodeGen/MIRParser/MIRParser.h"
    14 #include "llvm/IR/DataLayout.h"
    15 #include "llvm/IR/LLVMContext.h"
    167#include "llvm/IR/LegacyPassManager.h"
    178#include "llvm/IR/Module.h"
    189#include "llvm/IR/Verifier.h"
    1910#include "llvm/IRReader/IRReader.h"
    20 #include "llvm/Pass.h"
    2111#include "llvm/Support/CommandLine.h"
    2212#include "llvm/Support/FileSystem.h"
    23 #include "llvm/Support/FormattedStream.h"
    2413#include "llvm/Support/SourceMgr.h"
    2514#include "llvm/Support/TargetRegistry.h"
     
    2716#include "llvm/Support/ToolOutputFile.h"
    2817#include "llvm/Target/TargetMachine.h"
    29 #include "llvm/Target/TargetSubtargetInfo.h"
    3018#include <memory>
    3119using namespace llvm;
     
    155143  return 0;
    156144}
     145
     146#endif
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5454 r5458  
    3535#include <sys/stat.h>
    3636#include <fcntl.h>
     37#ifdef CUDA_ENABLED
     38#include <preprocess.cpp>
     39#include <IR_Gen/CudaDriver.h>
     40#endif
    3741
    3842using namespace parabix;
     
    5963
    6064static std::vector<std::string> parsedPropertyValues;
     65
     66std::string IRFilename = "icgrep.ll";
     67std::string PTXFilename = "icgrep.ptx";
     68size_t * startPoints = nullptr;
     69size_t * accumBytes = nullptr;
     70
     71void GrepEngine::doGrep(const std::string & fileName) const{
     72#ifdef CUDA_ENABLED
     73    const bool CountOnly = true;
     74    boost::filesystem::path file(fileName);
     75    if (exists(file)) {
     76        if (is_directory(file)) {
     77            return;
     78        }
     79    } else {
     80        if (!SilenceFileErrors) {
     81            std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
     82            return;
     83        }
     84    }
     85
     86    const auto fileSize = file_size(file);
     87   
     88    if (fileSize > 0) {
     89        try {
     90            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
     91            char * fileBuffer = const_cast<char *>(source.data());
     92           
     93            codegen::BlockSize = 128;
     94            std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize);
     95           
     96            const unsigned numOfGroups = codegen::GroupNum;
     97            if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
     98                posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
     99                std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
     100                exit(-1);
     101            }
     102
     103            ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
     104            source.close();
     105        } catch (std::exception & e) {
     106            if (!SilenceFileErrors) {
     107                std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
     108                return;
     109            }
     110        }
     111    } else {
     112        std::cout << 0 << std::endl;
     113    }
     114#endif
     115}
    61116
    62117uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) const {
     
    196251    assert (line_start <= line_end);
    197252    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
     253}
     254
     255void GrepEngine::grepCodeGen_nvptx(const std::string & moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16) {
     256
     257    NVPTXDriver pxDriver(moduleName + ":icgrep");
     258    auto & idb = pxDriver.getBuilder();
     259    Module * M = idb->getModule();
     260
     261    const unsigned segmentSize = codegen::SegmentSize;
     262    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     263    const unsigned encodingBits = UTF_16 ? 16 : 8;
     264
     265    Type * const int64Ty = idb->getInt64Ty();
     266    Type * const int32Ty = idb->getInt32Ty();
     267    Type * const size_ty = idb->getSizeTy();
     268    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
     269    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
     270    Type * const voidTy = idb->getVoidTy();
     271
     272    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
     273    mainFunc->setCallingConv(CallingConv::C);
     274    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     275    auto args = mainFunc->arg_begin();
     276
     277    Value * const inputPtr = &*(args++);
     278    inputPtr->setName("inputPtr");
     279    Value * const startPointsPtr = &*(args++);
     280    startPointsPtr->setName("startPointsPtr");
     281    Value * const bufferSizesPtr = &*(args++);
     282    bufferSizesPtr->setName("bufferSizesPtr");
     283    Value * const outputPtr = &*(args++);
     284    outputPtr->setName("outputPtr");
     285
     286    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     287    Value * tid = idb->CreateCall(tidFunc);
     288    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
     289    Value * bid = idb->CreateCall(bidFunc);
     290
     291    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
     292    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
     293    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
     294    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
     295    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
     296    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
     297
     298    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
     299    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
     300    sourceK->setInitialArguments({inputStream, bufferSize});
     301    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
     302
     303    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));   
     304    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     305    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     306 
     307    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));   
     308    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
     309    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     310   
     311    const auto n = REs.size();
     312
     313    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     314
     315    for(unsigned i = 0; i < n; ++i){
     316        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     317        kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
     318        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     319        MatchResultsBufs[i] = MatchResults;
     320    }
     321    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     322    if (REs.size() > 1) {
     323        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     324        kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
     325        pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     326    }
     327
     328
     329    // StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     330    // kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[0]));
     331    // pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     332
     333    kernel::MatchCount matchCountK(idb);
     334    pxDriver.addKernelCall(matchCountK, {MergedResults}, {});
     335    pxDriver.generatePipelineIR();
     336
     337    idb->setKernel(&matchCountK);
     338    Value * matchedLineCount = idb->getScalarField("matchedLineCount");
     339    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
     340   
     341    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
     342    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
     343    idb->CreateStore(matchedLineCount, outputThreadPtr);
     344    idb->CreateRetVoid();
     345
     346    pxDriver.finalizeAndCompile(mainFunc, IRFilename, PTXFilename);
    198347}
    199348
     
    337486}
    338487
     488
    339489re::CC * GrepEngine::grepCodepoints() {
    340490    parsedCodePointSet = re::makeCC();
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5425 r5458  
    2121    void grepCodeGen(const std::string & moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepSource grepSource, GrepType grepType = GrepType::Normal);
    2222
    23     uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) const;
     23    void grepCodeGen_nvptx(const std::string & moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16);
     24
     25    void doGrep(const std::string & fileName) const;
     26
     27        uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) const;
    2428
    2529    uint64_t doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) const;
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5425 r5458  
    417417
    418418    } else {
    419 
    420         grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::File);
     419       
     420        setNVPTXOption();
     421       
     422        if(codegen::NVPTX){
     423            grepEngine.grepCodeGen_nvptx(module_name, REs, CountOnly, UTF_16);
     424            for (unsigned i = 0; i != allFiles.size(); ++i) {
     425                grepEngine.doGrep(allFiles[i]);
     426            }         
     427            return 0;
     428        }
     429        else{
     430            grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::File);
     431        }
    421432
    422433        if (FileNamesOnly && NonMatchingFileNamesOnly) {
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5454 r5458  
    2929#include <sys/stat.h>
    3030#include <llvm/IR/Verifier.h>
     31#include <toolchain/NVPTXDriver.cpp>
    3132//#include <toolchain/workqueue.h>
    3233
     
    9899static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
    99100
     101bool NVPTX;
     102int GroupNum;
     103static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
     104static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
     105
     106}
     107
     108void setNVPTXOption(){
     109    codegen::NVPTX = codegen::USENVPTX;
     110    if(codegen::NVPTX){
     111#ifndef CUDA_ENABLED
     112    std::cerr << "CUDA compiler is not supported.\n";
     113    exit(-1);
     114#endif
     115    }
    100116}
    101117
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5454 r5458  
    1212#include <kernels/streamset.h>
    1313
     14#include <toolchain/NVPTXDriver.h>
    1415namespace llvm { class ExecutionEngine; }
    1516namespace llvm { class Function; }
     
    4849extern bool EnableAsserts;
    4950extern bool EnableCycleCounter;
    50 #ifdef CUDA_ENABLED
    5151extern bool NVPTX;
    5252extern int GroupNum;
    53 #endif
    5453}
    5554
    56 #ifdef CUDA_ENABLED
     55
    5756void setNVPTXOption();
    58 void Compile2PTX (llvm::Module * m, std::string IRFilename, std::string PTXFilename);
    59 #endif
    6057
    6158void AddParabixVersionPrinter();
Note: See TracChangeset for help on using the changeset viewer.