Ignore:
Timestamp:
May 19, 2017, 12:58:48 PM (2 years ago)
Author:
lindanl
Message:

Add NVPTX driver.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5454 r5458  
    3535#include <sys/stat.h>
    3636#include <fcntl.h>
     37#ifdef CUDA_ENABLED
     38#include <preprocess.cpp>
     39#include <IR_Gen/CudaDriver.h>
     40#endif
    3741
    3842using namespace parabix;
     
    5963
    6064static std::vector<std::string> parsedPropertyValues;
     65
     66std::string IRFilename = "icgrep.ll";
     67std::string PTXFilename = "icgrep.ptx";
     68size_t * startPoints = nullptr;
     69size_t * accumBytes = nullptr;
     70
     71void GrepEngine::doGrep(const std::string & fileName) const{
     72#ifdef CUDA_ENABLED
     73    const bool CountOnly = true;
     74    boost::filesystem::path file(fileName);
     75    if (exists(file)) {
     76        if (is_directory(file)) {
     77            return;
     78        }
     79    } else {
     80        if (!SilenceFileErrors) {
     81            std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
     82            return;
     83        }
     84    }
     85
     86    const auto fileSize = file_size(file);
     87   
     88    if (fileSize > 0) {
     89        try {
     90            boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
     91            char * fileBuffer = const_cast<char *>(source.data());
     92           
     93            codegen::BlockSize = 128;
     94            std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize);
     95           
     96            const unsigned numOfGroups = codegen::GroupNum;
     97            if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
     98                posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
     99                std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
     100                exit(-1);
     101            }
     102
     103            ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
     104            source.close();
     105        } catch (std::exception & e) {
     106            if (!SilenceFileErrors) {
     107                std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
     108                return;
     109            }
     110        }
     111    } else {
     112        std::cout << 0 << std::endl;
     113    }
     114#endif
     115}
    61116
    62117uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) const {
     
    196251    assert (line_start <= line_end);
    197252    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
     253}
     254
     255void GrepEngine::grepCodeGen_nvptx(const std::string & moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16) {
     256
     257    NVPTXDriver pxDriver(moduleName + ":icgrep");
     258    auto & idb = pxDriver.getBuilder();
     259    Module * M = idb->getModule();
     260
     261    const unsigned segmentSize = codegen::SegmentSize;
     262    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     263    const unsigned encodingBits = UTF_16 ? 16 : 8;
     264
     265    Type * const int64Ty = idb->getInt64Ty();
     266    Type * const int32Ty = idb->getInt32Ty();
     267    Type * const size_ty = idb->getSizeTy();
     268    Type * const sizeTyPtr = PointerType::get(size_ty, 1);
     269    Type * const int64tyPtr = PointerType::get(int64Ty, 1);
     270    Type * const voidTy = idb->getVoidTy();
     271
     272    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", voidTy, int64tyPtr, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
     273    mainFunc->setCallingConv(CallingConv::C);
     274    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     275    auto args = mainFunc->arg_begin();
     276
     277    Value * const inputPtr = &*(args++);
     278    inputPtr->setName("inputPtr");
     279    Value * const startPointsPtr = &*(args++);
     280    startPointsPtr->setName("startPointsPtr");
     281    Value * const bufferSizesPtr = &*(args++);
     282    bufferSizesPtr->setName("bufferSizesPtr");
     283    Value * const outputPtr = &*(args++);
     284    outputPtr->setName("outputPtr");
     285
     286    Function * tidFunc = M->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
     287    Value * tid = idb->CreateCall(tidFunc);
     288    Function * bidFunc = cast<Function>(M->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32Ty, nullptr));
     289    Value * bid = idb->CreateCall(bidFunc);
     290
     291    Value * startPoint = idb->CreateLoad(idb->CreateGEP(startPointsPtr, bid));
     292    Value * startBlock = idb->CreateUDiv(startPoint, ConstantInt::get(int64Ty, idb->getBitBlockWidth()));
     293    Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(idb->getBitBlockType(), 8), 1), 1);   
     294    Value * inputStreamPtr = idb->CreateGEP(idb->CreateBitCast(inputPtr, inputStreamType), startBlock);
     295    Value * inputStream = idb->CreateGEP(inputStreamPtr, tid);
     296    Value * bufferSize = idb->CreateLoad(idb->CreateGEP(bufferSizesPtr, bid));
     297
     298    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, 8), 1));
     299    kernel::Kernel * sourceK = pxDriver.addKernelInstance(make_unique<kernel::MemorySourceKernel>(idb, inputStreamType, segmentSize));
     300    sourceK->setInitialArguments({inputStream, bufferSize});
     301    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
     302
     303    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize * bufferSegments));   
     304    kernel::Kernel * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     305    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     306 
     307    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));   
     308    kernel::Kernel * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
     309    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     310   
     311    const auto n = REs.size();
     312
     313    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     314
     315    for(unsigned i = 0; i < n; ++i){
     316        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     317        kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
     318        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     319        MatchResultsBufs[i] = MatchResults;
     320    }
     321    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     322    if (REs.size() > 1) {
     323        MergedResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     324        kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
     325        pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     326    }
     327
     328
     329    // StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     330    // kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[0]));
     331    // pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
     332
     333    kernel::MatchCount matchCountK(idb);
     334    pxDriver.addKernelCall(matchCountK, {MergedResults}, {});
     335    pxDriver.generatePipelineIR();
     336
     337    idb->setKernel(&matchCountK);
     338    Value * matchedLineCount = idb->getScalarField("matchedLineCount");
     339    matchedLineCount = idb->CreateZExt(matchedLineCount, int64Ty);
     340   
     341    Value * strideBlocks = ConstantInt::get(int32Ty, idb->getStride() / idb->getBitBlockWidth());
     342    Value * outputThreadPtr = idb->CreateGEP(outputPtr, idb->CreateAdd(idb->CreateMul(bid, strideBlocks), tid));
     343    idb->CreateStore(matchedLineCount, outputThreadPtr);
     344    idb->CreateRetVoid();
     345
     346    pxDriver.finalizeAndCompile(mainFunc, IRFilename, PTXFilename);
    198347}
    199348
     
    337486}
    338487
     488
    339489re::CC * GrepEngine::grepCodepoints() {
    340490    parsedCodePointSet = re::makeCC();
Note: See TracChangeset for help on using the changeset viewer.