Ignore:
Timestamp:
Nov 10, 2016, 1:42:37 PM (3 years ago)
Author:
lindanl
Message:

editd for GPU.

Location:
icGREP/icgrep-devel/icgrep
Files:
5 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5211 r5212  
    110110add_executable(u8u16 u8u16.cpp toolchain.cpp kernels/p2s_kernel.cpp kernels/pipeline.cpp kernels/deletion.cpp kernels/stdout_kernel.cpp)
    111111add_executable(wc wc.cpp toolchain.cpp kernels/pipeline.cpp)
    112 add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp kernels/pipeline.cpp editd/editdscan_kernel.cpp)
     112add_executable(editd editd/editd.cpp editd/pattern_compiler.cpp toolchain.cpp kernels/pipeline.cpp editd/editdscan_kernel.cpp editd/editd_gpu_kernel.cpp editd/editd_cpu_kernel.cpp)
    113113
    114114IF(ENABLE_PREGENERATED_UCD_FUNCTIONS)
  • icGREP/icgrep-devel/icgrep/IDISA/CudaDriver.h

    r5151 r5212  
    101101  // Retrieve device data
    102102
    103   ulong * matchRslt = (ulong *) malloc(outputSize);
     103  ulong * matchRslt;
     104  int ret = posix_memalign((void**)&matchRslt, 32, outputSize);
     105  if (ret) {
     106    std::cerr << "Cannot allocate memory for output.\n";
     107    exit(-1);
     108  }
    104109  checkCudaErrors(cuMemcpyDtoH(matchRslt, devBufferOutput, outputSize));
    105110  if (CountOnly){
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5202 r5212  
    4444#include <boost/iostreams/device/mapped_file.hpp>
    4545#include <fcntl.h>
     46
     47#ifdef CUDA_ENABLED
     48#include <editd/EditdCudaDriver.h>
     49#include <editd/editd_gpu_kernel.h>
     50#endif
     51
    4652static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
    4753
     
    5460static cl::opt<int> optPosition("opt-pos", cl::desc("Optimize position"), cl::init(8));
    5561static cl::opt<int> stepSize("step-size", cl::desc("Step Size"), cl::init(3));
     62static cl::opt<int> prefixLen("prefix", cl::desc("Prefix length"), cl::init(4));
    5663
    5764using namespace kernel;
    5865using namespace pablo;
     66
     67std::string IRFilename = "editd.ll";
     68std::string PTXFilename = "editd.ptx";
    5969
    6070struct matchPosition
     
    6575
    6676std::vector<struct matchPosition> matchList;
    67 
    68 void sort_match_list(){
    69    
    70 }
     77std::vector<std::vector<std::string>> pattGroups;
    7178
    7279void run_second_filter(int total_len, int pattern_segs, float errRate){
    7380   
    7481    if(matchList.size() == 0) return;
     82
     83    //remove the duplicates
     84    bool cleared = true;
     85    while(cleared){
     86        cleared = false;
     87        for (unsigned i=0; i<matchList.size()-1; i++){
     88            if(matchList[i].pos == matchList[i+1].pos && matchList[i].dist == matchList[i+1].dist){
     89                matchList.erase(matchList.begin() + i);
     90                cleared = true;
     91            }
     92        }
     93    }
    7594
    7695    //Sort match position
     
    149168                total_len += r.size();
    150169            }
     170            std::sort(pattVector.begin(), pattVector.end());
     171            unsigned i = 0;
     172            while(i < pattVector.size()){
     173                std::vector<std::string> pattGroup;
     174                std::string prefix = pattVector[i].substr(0, prefixLen);
     175                while(i < pattVector.size() && pattVector[i].substr(0, prefixLen) == prefix){
     176                    pattGroup.push_back(pattVector[i]);
     177                    i++;
     178                }
     179                pattGroups.push_back(pattGroup);
     180            }
    151181            pattFile.close();
    152182        }
     
    212242    Type * mBitBlockType = iBuilder->getBitBlockType();
    213243   
    214     ExternalFileBuffer ByteStream(iBuilder, StreamSetType(iBuilder,1, i8));
    215     SingleBlockBuffer BasisBits(iBuilder, StreamSetType(iBuilder,8, i1));
    216     ExternalFileBuffer CCResults(iBuilder, StreamSetType(iBuilder,4, i1));
     244    ExternalFileBuffer ByteStream(iBuilder, StreamSetType(iBuilder,1, 8));
     245    SingleBlockBuffer BasisBits(iBuilder, StreamSetType(iBuilder,8, 1));
     246    ExternalFileBuffer CCResults(iBuilder, StreamSetType(iBuilder,4, 1));
    217247
    218248    s2pKernel  s2pk(iBuilder);
     
    265295
    266296typedef void (*preprocessFunctionType)(char * byte_data, size_t filesize, char * output_data);
    267 static ExecutionEngine * preprocessEngine = nullptr;
    268297
    269298preprocessFunctionType preprocessCodeGen() {
     
    272301    Module * M = new Module("preprocess", TheContext);
    273302    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
     303    ExecutionEngine * preprocessEngine = nullptr;
    274304
    275305    PabloFunction * function = PabloFunction::Create("preprocess"); // , 8, 4
     
    300330
    301331typedef void (*editdFunctionType)(char * byte_data, size_t filesize);
    302 static ExecutionEngine * editdEngine = nullptr;
    303 
    304 editdFunctionType editdCodeGen() {
     332
     333editdFunctionType editdCodeGen(std::vector<std::string> patterns) {
    305334                           
    306335    LLVMContext TheContext;
    307336    Module * M = new Module("editd", TheContext);
    308337    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
     338    ExecutionEngine * editdEngine = nullptr;
    309339
    310340    PabloFunction * function = PabloFunction::Create("editd"); // , 4, editDistance + 1
     
    321351
    322352    re::Pattern_Compiler pattern_compiler(*function);
    323     pattern_compiler.compile(pattVector, main, basisBits, editDistance, optPosition, stepSize);
     353    pattern_compiler.compile(patterns, main, basisBits, editDistance, optPosition, stepSize);
    324354
    325355    llvm::Function * main_IR = editdPipeline(M, idb, function);
     
    333363}
    334364
    335 char * preprocess(preprocessFunctionType fn_ptr, int & size) {
     365char * chStream;
     366int size;
     367
     368char * preprocess(preprocessFunctionType fn_ptr) {
    336369    std::string fileName = inputFiles[0];
    337370    size_t fileSize;
     
    362395        fileBuffer = const_cast<char *>(mappedFile.data());
    363396    }
    364     char * chStream = (char *) aligned_alloc(32, fileSize);
     397
     398    int ret = posix_memalign((void**)&chStream, 32, fileSize);
     399    if (ret) {
     400        std::cerr << "Cannot allocate memory for output.\n";
     401        exit(-1);
     402    }
     403
    365404    fn_ptr(fileBuffer, fileSize, chStream);
    366405    size = fileSize;
     
    368407    mappedFile.close();
    369408
    370     return chStream;
    371    
    372 }
    373 
    374 void editd(editdFunctionType fn_ptr, char * chStream, int size) {
     409    return chStream;   
     410}
     411
     412void editd(editdFunctionType fn_ptr, char * inputStream, int size) {
    375413 
    376414    if (size == 0) {
    377         chStream = nullptr;
    378     }
    379 
    380     fn_ptr(chStream, size);
    381    
    382 }
     415        inputStream = nullptr;
     416    }
     417
     418    fn_ptr(inputStream, size);
     419   
     420}
     421
     422#ifdef CUDA_ENABLED
     423
     424#define GROUPTHREADS 64
     425#define GROUPBLOCKS 64
     426
     427void editdGPUCodeGen(){ 
     428    LLVMContext TheContext;
     429    Module * M = new Module("editd-gpu", TheContext);
     430    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_GPU_Builder(M);
     431    M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
     432    M->setTargetTriple("nvptx64-nvidia-cuda");
     433    unsigned addrSpace = 1;
     434
     435    Type * const mBitBlockType = iBuilder->getBitBlockType();
     436    Type * const inputSizeTy = PointerType::get(iBuilder->getSizeTy(), 1);
     437    Type * const int32ty = iBuilder->getInt32Ty();
     438    Type * const voidTy = Type::getVoidTy(M->getContext());
     439    Type * const inputTy = PointerType::get(ArrayType::get(mBitBlockType, 4), 1);
     440    Type * const patternPtrTy = PointerType::get(iBuilder->getInt8Ty(), 1);
     441    Type * const outputTy = PointerType::get(ArrayType::get(mBitBlockType, editDistance+1), 1);
     442    Type * const stridesTy = PointerType::get(int32ty, 1);
     443
     444    ExternalFileBuffer CCStream(iBuilder, StreamSetType(iBuilder, 4, 1), addrSpace);
     445    ExternalFileBuffer ResultStream(iBuilder, StreamSetType(iBuilder, editDistance+1, 1), addrSpace);
     446
     447    const unsigned patternLen = 19;
     448    kernel::editdGPUKernel editdk(iBuilder, editDistance, patternLen);
     449    editdk.generateKernel({&CCStream}, {&ResultStream});
     450
     451    Function * const main = cast<Function>(M->getOrInsertFunction("GPU_Main", voidTy, inputTy, inputSizeTy, patternPtrTy, outputTy, stridesTy, nullptr));
     452    main->setCallingConv(CallingConv::C);
     453    Function::arg_iterator args = main->arg_begin();
     454   
     455    Value * const inputStream = &*(args++);
     456    inputStream->setName("input");
     457    Value * const inputSizePtr = &*(args++);
     458    inputSizePtr->setName("inputSizePtr");
     459    Value * const pattStream = &*(args++);
     460    pattStream->setName("pattStream");
     461    Value * const resultStream = &*(args++);
     462    resultStream->setName("resultStream");
     463    Value * const stridesPtr = &*(args++);
     464    stridesPtr->setName("stridesPtr");
     465   
     466    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main,0));
     467
     468    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     469    Value * tid = iBuilder->CreateCall(tidFunc);
     470    Value * inputThreadPtr = iBuilder->CreateGEP(inputStream, tid);
     471
     472    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
     473    Value * bid = iBuilder->CreateCall(bidFunc);
     474    Value * strides = iBuilder->CreateLoad(stridesPtr);
     475    Value * outputBlocks = iBuilder->CreateMul(strides, ConstantInt::get(int32ty, GROUPTHREADS));
     476    Value * resultStreamPtr = iBuilder->CreateGEP(resultStream, iBuilder->CreateAdd(iBuilder->CreateMul(bid, outputBlocks), tid));
     477
     478    Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
     479    CCStream.setStreamSetBuffer(inputThreadPtr, inputSize);
     480    ResultStream.setEmptyBuffer(resultStreamPtr);
     481
     482    const unsigned numOfCarries = patternLen * (editDistance + 1) * 4;
     483    Type * strideCarryTy = ArrayType::get(mBitBlockType, numOfCarries);
     484    Value * strideCarry = iBuilder->CreateAlloca(strideCarryTy);
     485    iBuilder->CreateStore(Constant::getNullValue(strideCarryTy), strideCarry);
     486
     487    Value * editdInstance = editdk.createInstance({pattStream, strideCarry});
     488   
     489    generatePipelineLoop(iBuilder, {&editdk}, {editdInstance}, inputSize);
     490       
     491    iBuilder->CreateRetVoid();
     492   
     493    MDNode * Node = MDNode::get(M->getContext(),
     494                                {llvm::ValueAsMetadata::get(main),
     495                                 MDString::get(M->getContext(), "kernel"),
     496                                 ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
     497    NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
     498    NMD->addOperand(Node);
     499
     500    Compile2PTX(M, IRFilename, PTXFilename);
     501
     502}
     503
     504editdFunctionType editdScanCPUCodeGen() {
     505                           
     506    LLVMContext TheContext;
     507    Module * M = new Module("editd", TheContext);
     508    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);
     509    ExecutionEngine * editdEngine = nullptr;
     510
     511    Type * mBitBlockType = iBuilder->getBitBlockType();
     512    Type * const size_ty = iBuilder->getSizeTy();
     513    Type * const voidTy = Type::getVoidTy(M->getContext());
     514    Type * const inputType = PointerType::get(ArrayType::get(mBitBlockType, 4), 0);
     515
     516    ExternalFileBuffer MatchResults(iBuilder, StreamSetType(iBuilder, editDistance+1, 1));
     517    kernel::editdScanKernel editdScanK(iBuilder, editDistance);
     518    editdScanK.generateKernel({&MatchResults}, {});               
     519   
     520    Function * const main = cast<Function>(M->getOrInsertFunction("CPU_Main", voidTy, inputType, size_ty, nullptr));
     521    main->setCallingConv(CallingConv::C);
     522    Function::arg_iterator args = main->arg_begin();
     523   
     524    Value * const inputStream = &*(args++);
     525    inputStream->setName("input");
     526    Value * const fileSize = &*(args++);
     527    fileSize->setName("fileSize");
     528   
     529    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main,0));
     530
     531    MatchResults.setStreamSetBuffer(inputStream, fileSize);
     532    Value * scanMatchInstance = editdScanK.createInstance({});
     533   
     534    generatePipelineLoop(iBuilder, {&editdScanK}, {scanMatchInstance}, fileSize);
     535       
     536    iBuilder->CreateRetVoid();
     537
     538    editdEngine = JIT_to_ExecutionEngine(M);
     539   
     540    editdEngine->finalizeObject();
     541
     542    return reinterpret_cast<editdFunctionType>(editdEngine->getPointerToFunction(main));
     543}
     544
     545void mergeResult(ulong * rslt){
     546    int strideSize = GROUPTHREADS * sizeof(ulong) * 8;
     547    int strides = size/strideSize + 1;
     548    int groupItems = strides * GROUPTHREADS * (editDistance + 1);
     549    for(int i=0; i<groupItems; i++){
     550        for(int j=1; j<GROUPBLOCKS; j++){
     551            rslt[i] = rslt[i] | rslt[j * groupItems + i];
     552        }
     553    }
     554}
     555#endif
    383556
    384557int main(int argc, char *argv[]) {
     
    390563
    391564    get_editd_pattern(pattern_segs, total_len);
    392  
     565
     566#ifdef CUDA_ENABLED
     567    codegen::BlockSize = 64;
     568#endif
     569
    393570    preprocessFunctionType preprocess_ptr = preprocessCodeGen();
    394     int size = 0;
    395     char * chStream = preprocess(preprocess_ptr, size);
    396    
    397     editdFunctionType editd_ptr = editdCodeGen();
    398     editd(editd_ptr, chStream, size);
    399 
    400     if(pattVector.size()>1)
     571    preprocess(preprocess_ptr);
     572
     573#ifdef CUDA_ENABLED 
     574    setNVPTXOption();   
     575    if(codegen::NVPTX){   
     576        editdGPUCodeGen();
     577
     578        std::ifstream t(PatternFilename);
     579        if (!t.is_open()) {
     580            std::cerr << "Error: cannot open " << PatternFilename << " for processing. Skipped.\n";
     581            exit(-1);
     582        } 
     583        std::string pattern_str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
     584
     585        ulong * rslt = RunPTX(PTXFilename, chStream, size, pattern_str.c_str(), pattern_str.length());
     586
     587        editdFunctionType editd_ptr = editdScanCPUCodeGen();
     588
     589        mergeResult(rslt);
     590        editd(editd_ptr, (char*)rslt, size);
    401591        run_second_filter(pattern_segs, total_len, 0.15);
    402592
     593        return 0;
     594    }
     595#endif
     596   
     597    if(pattVector.size() == 1){
     598        editdFunctionType editd_ptr = editdCodeGen(pattVector);
     599        editd(editd_ptr, chStream, size);
     600    }
     601    else{
     602        for(unsigned i=0; i<pattGroups.size(); i++){
     603            editdFunctionType editd_ptr = editdCodeGen(pattGroups[i]);
     604            editd(editd_ptr, chStream, size);
     605        }
     606        run_second_filter(pattern_segs, total_len, 0.15);
     607    }
    403608
    404609    return 0;
Note: See TracChangeset for help on using the changeset viewer.