Ignore:
Timestamp:
Apr 22, 2017, 4:03:25 PM (2 years ago)
Author:
nmedfort
Message:

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5416 r5418  
    1111#include <llvm/Support/CommandLine.h>
    1212#include <boost/filesystem.hpp>
    13 #include <boost/iostreams/device/mapped_file.hpp>
    1413#include <IR_Gen/idisa_builder.h>
    1514#include <IR_Gen/idisa_target.h>
     
    3332#include <sstream>
    3433#include <cc/multiplex_CCs.h>
    35 
    3634#include <llvm/Support/raw_ostream.h>
     35#include <util/aligned_allocator.h>
    3736#include <sys/stat.h>
    38 
    39 
    40 #ifdef CUDA_ENABLED
    41 #include <IR_Gen/CudaDriver.h>
    42 #include "preprocess.cpp"
    43 #endif
    44 #include <util/aligned_allocator.h>
     37#include <fcntl.h>
    4538
    4639using namespace parabix;
     
    6154static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    6255
    63 #ifdef CUDA_ENABLED
    64 const auto IRFilename = "icgrep.ll";
    65 const auto PTXFilename = "icgrep.ptx";
    66 #endif
    67 
    6856static re::CC * parsedCodePointSet = nullptr;
    6957
    7058static std::vector<std::string> parsedPropertyValues;
    7159
    72 #ifdef CUDA_ENABLED
    73 int blockNo = 0;
    74 size_t * startPoints = nullptr;
    75 size_t * accumBytes = nullptr;
    76 #endif
    77 
    78 void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    79     boost::filesystem::path file(fileName);
    80     if (exists(file)) {
    81         if (is_directory(file)) {
    82             return;
    83         }
    84     } else {
    85         if (!SilenceFileErrors) {
    86             std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    87             return;
    88         }
    89     }
    90 
    91     const auto fileSize = file_size(file);
    92     if (fileSize > 0) {
    93         try {
    94             boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
    95             char * fileBuffer = const_cast<char *>(source.data());
    96            
    97 #ifdef CUDA_ENABLED 
    98             if(codegen::NVPTX){
    99                 codegen::BlockSize = 128;
    100                 char * LineBreak;
    101                 if (posix_memalign((void**)&LineBreak, 32, fileSize)) {
    102                     std::cerr << "Cannot allocate memory for linebreak.\n";
    103                     exit(-1);
    104                 }
    105                 std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize, LineBreak);
    106 
    107                 const unsigned numOfGroups = codegen::GroupNum;
    108                 if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
    109                     posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
    110                     std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
    111                     exit(-1);
    112                 }
    113 
    114                 ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
    115                 if (CountOnly){
    116                     exit(0);
    117                 }
    118                 else{
    119                     size_t intputSize = startPoints[numOfGroups]-accumBytes[numOfGroups]+accumBytes[numOfGroups-1];
    120                     mGrepFunction_CPU((char *)rslt, LineBreak, fileBuffer, intputSize, fileIdx);
    121                     return;
    122                 }
    123                
    124             }
    125 #endif
    126             if (CountOnly) {
    127                 total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx);
    128             } else {
    129                 mGrepFunction(fileBuffer, fileSize, fileIdx);
    130             }
    131             source.close();
    132         } catch (std::exception & e) {
    133             if (!SilenceFileErrors) {
    134                 std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
    135                 return;
    136             }
    137         }
    138     } else {
    139 #ifdef CUDA_ENABLED
    140         if (codegen::NVPTX){
    141             std::cout << 0 << std::endl;
    142             exit(0);
    143         }
    144 #endif
    145         if (CountOnly) {
    146             total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
    147         } else {
    148             mGrepFunction(nullptr, 0, fileIdx);
    149         }
    150     }
    151 }
    152 
    153 void GrepEngine::doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    154     if (CountOnly) {
    155         total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
    156     } else {
    157         mGrepFunction(nullptr, 0, fileIdx);
    158     }
    159 }
    160 
    161 #ifdef CUDA_ENABLED
    162 Function * generateGPUKernel(NVPTXDriver & nvptxDriver, bool CountOnly){
    163     IDISA::IDISA_Builder * iBuilder = nvptxDriver.getIDISA_Builder();
    164     Module * m = iBuilder->getModule();
    165     Type * const int64ty = iBuilder->getInt64Ty();
    166     Type * const size_ty = iBuilder->getSizeTy();
    167     Type * const int32ty = iBuilder->getInt32Ty();
    168     Type * const sizeTyPtr = PointerType::get(size_ty, 1);
    169     Type * const int64tyPtr = PointerType::get(int64ty, 1);
    170     Type * const inputType = PointerType::get(iBuilder->getInt8Ty(), 1);
    171     Type * const resultTy = iBuilder->getVoidTy();
    172     Function * kernelFunc = cast<Function>(m->getOrInsertFunction("Main", resultTy, inputType, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
    173     kernelFunc->setCallingConv(CallingConv::C);
    174     Function::arg_iterator args = kernelFunc->arg_begin();
    175 
    176     Value * const inputPtr = &*(args++);
    177     inputPtr->setName("inputPtr");
    178     Value * const startPointsPtr = &*(args++);
    179     startPointsPtr->setName("startPointsPtr");
    180     Value * const bufferSizesPtr = &*(args++);
    181     bufferSizesPtr->setName("bufferSizesPtr");
    182     Value * const outputPtr = &*(args++);
    183     outputPtr->setName("resultPtr");
    184 
    185     BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", kernelFunc, 0);
    186     iBuilder->SetInsertPoint(entryBlock);
    187 
    188     Function * tidFunc = m->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    189     Value * tid = iBuilder->CreateCall(tidFunc);
    190     Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    191     Value * bid = iBuilder->CreateCall(bidFunc);
    192 
    193     Value * startPoint = iBuilder->CreateLoad(iBuilder->CreateGEP(startPointsPtr, bid));
    194 
    195     Function * mainFunc = m->getFunction("Main");
    196     Value * startBlock = iBuilder->CreateUDiv(startPoint, ConstantInt::get(int64ty, iBuilder->getBitBlockWidth()));
    197     Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), 8), 1), 1);   
    198     Value * inputStreamPtr = iBuilder->CreateGEP(iBuilder->CreateBitCast(inputPtr, inputStreamType), startBlock);
    199     Value * inputStream = iBuilder->CreateGEP(inputStreamPtr, tid);
    200     Value * bufferSize = iBuilder->CreateLoad(iBuilder->CreateGEP(bufferSizesPtr, bid));
    201 
    202     if (CountOnly) {
    203         Value * strideBlocks = ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    204         Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateAdd(iBuilder->CreateMul(bid, strideBlocks), tid));
    205         Value * result = iBuilder->CreateCall(mainFunc, {inputStream, bufferSize});
    206         iBuilder->CreateStore(result, outputThreadPtr);
    207     } else {
    208         Type * const outputStremType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), 1);
    209         Value * outputStreamPtr = iBuilder->CreateGEP(iBuilder->CreateBitCast(outputPtr, outputStremType), startBlock);
    210         Value * outputStream = iBuilder->CreateGEP(outputStreamPtr, tid);
    211         iBuilder->CreateCall(mainFunc, {inputStream, bufferSize, outputStream});
    212     }   
    213 
    214     iBuilder->CreateRetVoid();
    215 
    216     return kernelFunc;
    217 }
    218 
    219 void generateCPUKernel(ParabixDriver & pxDriver, GrepType grepType){
    220     IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
    221     Module * m = iBuilder->getModule();
    222 
    223     Type * const size_ty = iBuilder->getSizeTy();
    224     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    225     Type * const rsltType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), 0);
    226     Function * const mainCPUFn = cast<Function>(m->getOrInsertFunction("Main", iBuilder->getVoidTy(), rsltType, rsltType, int8PtrTy, size_ty, size_ty, nullptr));
    227     mainCPUFn->setCallingConv(CallingConv::C);
    228     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mainCPUFn, 0));
    229     Function::arg_iterator args = mainCPUFn->arg_begin();
    230    
    231     Value * const rsltStream = &*(args++);
    232     rsltStream->setName("rslt");
    233     Value * const lbStream = &*(args++);
    234     lbStream->setName("lb");
    235     Value * const inputStream = &*(args++);
    236     inputStream->setName("input");
    237     Value * const fileSize = &*(args++);
    238     fileSize->setName("fileSize");
    239     Value * const fileIdx = &*(args++);
    240     fileIdx->setName("fileIdx");
    241 
    242     const unsigned segmentSize = codegen::SegmentSize;
    243    
    244     ExternalFileBuffer InputStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    245     InputStream.setStreamSetBuffer(inputStream);
    246 
    247     ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1));
    248     MatchResults.setStreamSetBuffer(rsltStream);
    249 
    250     kernel::MMapSourceKernel mmapK0(iBuilder, segmentSize);
    251     mmapK0.setName("mmap0");
    252     mmapK0.setInitialArguments({fileSize});
    253     pxDriver.addKernelCall(mmapK0, {}, {InputStream});
    254 
    255 
    256     kernel::MMapSourceKernel mmapK1(iBuilder, segmentSize);
    257     mmapK1.setName("mmap1");
    258     mmapK1.setInitialArguments({fileSize});
    259     pxDriver.addKernelCall(mmapK1, {}, {MatchResults});
    260 
    261     ExternalFileBuffer LineBreak(iBuilder, iBuilder->getStreamSetTy(1, 1));
    262     LineBreak.setStreamSetBuffer(lbStream);
    263    
    264     kernel::MMapSourceKernel mmapK2(iBuilder, segmentSize);
    265     mmapK2.setName("mmap2");
    266     mmapK2.setInitialArguments({fileSize});
    267     pxDriver.addKernelCall(mmapK2, {}, {LineBreak});
    268 
    269     kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, 8);
    270     scanMatchK.setInitialArguments({fileIdx});
    271     pxDriver.addKernelCall(scanMatchK, {InputStream, MatchResults, LineBreak}, {});
    272     pxDriver.generatePipelineIR();
    273     iBuilder->CreateRetVoid();
    274 
    275     pxDriver.linkAndFinalize();
    276 }
    277 #endif
     60uint64_t GrepEngine::doGrep(const std::string & fileName, const int fileIdx) const {
     61    const int fd = open(fileName.c_str(), O_RDONLY);
     62    if (LLVM_UNLIKELY(fd == -1)) {
     63        return 0;
     64    }
     65    const auto result = doGrep(fd, fileIdx);
     66    close(fd);
     67    return result;
     68}
     69
     70uint64_t GrepEngine::doGrep(const uint32_t fileDescriptor, const int fileIdx) const {
     71    assert (mGrepFunction);
     72    typedef uint64_t (*GrepFunctionType)(size_t fileDescriptor, const int fileIdx);
     73    return reinterpret_cast<GrepFunctionType>(mGrepFunction)(fileDescriptor, fileIdx);
     74}
     75
     76void GrepEngine::doGrep(const char * buffer, const uint64_t length, const int fileIdx) const {
     77    assert (mGrepFunction);
     78    typedef uint64_t (*GrepFunctionType)(const char * buffer, const uint64_t length, const int fileIdx);
     79    reinterpret_cast<GrepFunctionType>(mGrepFunction)(buffer, length, fileIdx);
     80}
    27881
    27982static int * total_count;
     
    297100template<typename CodeUnit>
    298101void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
     102
     103//    errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
     104
    299105    assert (buffer);
    300106    assert (line_start <= line_end);
    301107    assert (line_end <= filesize);
    302 
    303   //  errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
    304 
    305     #ifdef CUDA_ENABLED
    306     if (codegen::NVPTX){
    307         while(line_start>startPoints[blockNo]) blockNo++;
    308         line_start -= accumBytes[blockNo-1];
    309         line_end -= accumBytes[blockNo-1];
    310     }
    311     #endif
    312108
    313109    if (ShowFileNames) {
     
    416212}
    417213
    418 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
    419     int addrSpace = 0;
    420     bool CPU_Only = true;
    421     Module * M = nullptr;
    422     IDISA::IDISA_Builder * iBuilder = nullptr;
    423 
    424     #ifdef CUDA_ENABLED
    425     setNVPTXOption();
    426     if (codegen::NVPTX) {
    427         Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
    428         IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
    429         M = gpuM;
    430         iBuilder = GPUBuilder;
    431         M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    432         M->setTargetTriple("nvptx64-nvidia-cuda");
    433         addrSpace = 1;
    434         CPU_Only = false;
    435         codegen::BlockSize = 64;
    436     }
    437     #endif
    438 
    439     Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
    440     IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
    441     if (CPU_Only) {
    442         M = cpuM;
    443         iBuilder = CPUBuilder;
    444     }
     214void GrepEngine::grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
     215
     216    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
     217    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    445218    ParabixDriver pxDriver(iBuilder);
    446219
    447     // segment size made available for each call to the mmap source kernel
    448220    const unsigned segmentSize = codegen::SegmentSize;
    449221    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    450222    const unsigned encodingBits = UTF_16 ? 16 : 8;
    451223
    452     Type * const size_ty = iBuilder->getSizeTy();
    453     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    454     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    455 
    456     Function * mainFn = nullptr;
    457     Value * inputStream = nullptr;
    458     Value * fileSize = nullptr;
     224    Type * const int64Ty = iBuilder->getInt64Ty();
     225
     226    Function * mainFunc = nullptr;
    459227    Value * fileIdx = nullptr;
    460 
    461     #ifdef CUDA_ENABLED
    462     Value * outputStream = nullptr;
    463     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
    464     if (codegen::NVPTX){
    465         if (CountOnly){
    466             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
    467             mainFn->setCallingConv(CallingConv::C);
    468             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    469             Function::arg_iterator args = mainFn->arg_begin();
    470 
    471             inputStream = &*(args++);
    472             inputStream->setName("input");
    473             fileSize = &*(args++);
    474             fileSize->setName("fileSize");
    475         } else {
    476             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
    477             mainFn->setCallingConv(CallingConv::C);
    478             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    479             Function::arg_iterator args = mainFn->arg_begin();
    480 
    481             inputStream = &*(args++);
    482             inputStream->setName("input");
    483             fileSize = &*(args++);
    484             fileSize->setName("fileSize");
    485             outputStream = &*(args++);
    486             outputStream->setName("output");
    487         }
    488     }
    489     #endif
    490 
    491     if (CPU_Only) {
    492         mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    493         mainFn->setCallingConv(CallingConv::C);
    494         iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    495         Function::arg_iterator args = mainFn->arg_begin();
    496 
    497         inputStream = &*(args++);
    498         inputStream->setName("input");
    499         fileSize = &*(args++);
    500         fileSize->setName("fileSize");
     228    StreamSetBuffer * ByteStream = nullptr;
     229    kernel::KernelBuilder * sourceK = nullptr;
     230
     231    if (grepSource == GrepSource::Internal) {
     232
     233        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, iBuilder->getInt8PtrTy(), int64Ty, int64Ty, nullptr));
     234        mainFunc->setCallingConv(CallingConv::C);
     235        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     236        Function::arg_iterator args = mainFunc->arg_begin();
     237
     238        Value * const buffer = &*(args++);
     239        buffer->setName("buffer");
     240        Value * const length = &*(args++);
     241        length->setName("length");
    501242        fileIdx = &*(args++);
    502243        fileIdx->setName("fileIdx");
    503244
    504     }
    505 
    506     StreamSetBuffer * ByteStream = nullptr;
    507     kernel::KernelBuilder * sourceK = nullptr;
    508     if (usingStdIn) {
    509         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    510         ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
    511         sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
     245        ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     246
     247        sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, iBuilder->getInt8PtrTy(), segmentSize));
     248        sourceK->setInitialArguments({buffer, length});
     249
    512250    } else {
    513         ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    514         sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, inputStream->getType(), segmentSize));
    515         sourceK->setInitialArguments({inputStream, fileSize});
    516     }
    517     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    518    
    519     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    520 
    521     kernel::KernelBuilder * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(iBuilder));
    522     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    523 
    524     kernel::KernelBuilder * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(iBuilder, encodingBits));
    525     StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    526     pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    527    
    528     StreamSetBuffer * MatchResults = nullptr;
    529 #ifdef CUDA_ENABLED
    530     if (codegen::NVPTX){
    531         MatchResults = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace), outputStream);
    532 
    533     }
    534     else {
    535 #endif
    536     MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    537 #ifdef CUDA_ENABLED
    538     }
    539 #endif
    540     kernel::KernelBuilder * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICgrepKernelBuilder>(iBuilder, re_ast));
    541     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    542    
    543     kernel::KernelBuilder * invertK = pxDriver.addKernelInstance(make_unique<kernel::InvertMatchesKernel>(iBuilder));
    544     if (AlgorithmOptionIsSet(re::InvertMatches)) {
    545         StreamSetBuffer * OriginalMatches = MatchResults;
    546         MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    547         pxDriver.makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MatchResults});
    548     }
    549 
    550     if (CountOnly) {
    551         kernel::KernelBuilder * popcountK = pxDriver.addKernelInstance(make_unique<kernel::PopcountKernel>(iBuilder));
    552         pxDriver.makeKernelCall(popcountK, {MatchResults}, {});
    553         pxDriver.generatePipelineIR();
    554         iBuilder->CreateRet(popcountK->createGetAccumulatorCall("countResult"));
    555 
    556         pxDriver.linkAndFinalize();
    557 
    558     } else {
    559 
    560         #ifdef CUDA_ENABLED
    561         if (codegen::NVPTX){
    562 
    563             pxDriver.generatePipelineIR();
    564 
    565             iBuilder->CreateRetVoid();
    566 
    567             pxDriver.linkAndFinalize();
    568         }
    569         #endif
    570 
    571         if (CPU_Only) {
    572             kernel::KernelBuilder * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(iBuilder, grepType, encodingBits));
    573             scanMatchK->setInitialArguments({fileIdx});
    574 
    575             pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
    576 
    577             linkGrepFunction(pxDriver, grepType, UTF_16, *scanMatchK);
    578 
    579             pxDriver.generatePipelineIR();
    580 
    581             iBuilder->CreateRetVoid();
    582 
    583             pxDriver.linkAndFinalize();
    584         }
    585     }
    586 
    587     #ifdef CUDA_ENABLED
    588     if(codegen::NVPTX){
    589         NVPTXDriver nvptxDriver(iBuilder);
    590         Function * kernelFunction = generateGPUKernel(nvptxDriver, CountOnly);
    591        
    592         MDNode * Node = MDNode::get(M->getContext(),
    593                                     {llvm::ValueAsMetadata::get(kernelFunction),
    594                                      MDString::get(M->getContext(), "kernel"),
    595                                      ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
    596         NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
    597         NMD->addOperand(Node);
    598 
    599         Compile2PTX(M, IRFilename, PTXFilename);
    600        
    601         ParabixDriver pxDriver(CPUBuilder);
    602         generateCPUKernel(pxDriver, grepType);
    603        
    604         mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(pxDriver.getPointerToMain());
    605         if (CountOnly) return;
    606     }
    607     #endif
    608 
    609     delete iBuilder;
    610 
    611     if (CountOnly) {
    612         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
    613     } else {
    614         if (CPU_Only) {
    615             mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
    616         }
    617     }
    618 }
    619 
    620 
    621 
    622 void GrepEngine::grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
    623 
    624     Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
    625     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    626     ParabixDriver pxDriver(iBuilder);
    627 
    628     const unsigned segmentSize = codegen::SegmentSize;
    629     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    630     const unsigned encodingBits = UTF_16 ? 16 : 8;
    631 
    632     Type * const sizeTy = iBuilder->getSizeTy();
    633     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    634     Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
    635 
    636     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    637     mainFn->setCallingConv(CallingConv::C);
    638     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    639     Function::arg_iterator args = mainFn->arg_begin();
    640 
    641     Value * inputStream = &*(args++);
    642     inputStream->setName("input");
    643     Value * fileSize = &*(args++);
    644     fileSize->setName("fileSize");
    645     Value * fileIdx = &*(args++);
    646     fileIdx->setName("fileIdx");
    647 
    648     StreamSetBuffer * ByteStream = nullptr;
    649     kernel::KernelBuilder * sourceK = nullptr;
    650     if (usingStdIn) {
    651         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    652         ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
    653         sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
    654     } else {
    655         ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    656         sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, inputStream->getType(), segmentSize));
    657         sourceK->setInitialArguments({inputStream, fileSize});
    658     }
     251
     252        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, iBuilder->getInt32Ty(), int64Ty, nullptr));
     253        mainFunc->setCallingConv(CallingConv::C);
     254        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     255        Function::arg_iterator args = mainFunc->arg_begin();
     256
     257        Value * const fileDescriptor = &*(args++);
     258        fileDescriptor->setName("fileDescriptor");
     259        fileIdx = &*(args++);
     260        fileIdx->setName("fileIdx");
     261
     262        if (grepSource == GrepSource::File) {
     263            ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     264            sourceK = pxDriver.addKernelInstance(make_unique<kernel::MMapSourceKernel>(iBuilder, segmentSize));
     265            sourceK->setInitialArguments({fileDescriptor});
     266        } else { // if (grepSource == GrepSource::StdIn) {
     267            ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
     268            sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
     269        }
     270    }
     271
    659272    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    660273    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
     
    667280    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    668281   
    669     std::vector<StreamSetBuffer *> MatchResultsBufs;
    670 
    671     for(unsigned i = 0; i < REs.size(); ++i){
     282    const auto n = REs.size();
     283
     284    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     285
     286    for(unsigned i = 0; i < n; ++i){
    672287        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    673288        kernel::KernelBuilder * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICgrepKernelBuilder>(iBuilder, REs[i]));
    674289        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    675         MatchResultsBufs.push_back(MatchResults);
     290        MatchResultsBufs[i] = MatchResults;
    676291    }
    677292    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     
    700315        linkGrepFunction(pxDriver, grepType, UTF_16, scanMatchK);
    701316        pxDriver.generatePipelineIR();
    702         iBuilder->CreateRetVoid();
     317        iBuilder->CreateRet(iBuilder->getInt64(0));
    703318        pxDriver.linkAndFinalize();
    704319    }
    705320
    706     //delete iBuilder;
    707 
    708     if (CountOnly) {
    709         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
    710     } else {
    711         mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
    712     }
     321    mGrepFunction = pxDriver.getPointerToMain();
    713322}
    714323
     
    717326    char * mFileBuffer = getUnicodeNameDataPtr();
    718327    size_t mFileSize = getUnicodeNameDataSize();
    719     mGrepFunction(mFileBuffer, mFileSize, 0);
     328    doGrep(mFileBuffer, mFileSize, 0);
    720329    return parsedCodePointSet;
    721330}
     
    732341    std::memcpy(aligned, str.data(), n);
    733342    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
    734     mGrepFunction(aligned, n, 0);
     343    doGrep(aligned, n, 0);
    735344    alloc.deallocate(aligned, 0);
    736345    return parsedPropertyValues;
     
    738347
    739348GrepEngine::GrepEngine()
    740 : mGrepFunction(nullptr)
    741 , mGrepFunction_CountOnly(nullptr)
    742 #ifdef CUDA_ENABLED
    743 , mGrepFunction_CPU(nullptr)
    744 #endif
    745 {
    746 
    747 }
     349: mGrepFunction(nullptr) {
     350
     351}
Note: See TracChangeset for help on using the changeset viewer.