Ignore:
Timestamp:
Apr 7, 2017, 4:59:04 PM (2 years ago)
Author:
nmedfort
Message:

Continued work on processing stdin input. Partial integration of ParabixDriver? methods into icgrep and editd. Object cache does not currently work for recursive REs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5393 r5398  
    161161}
    162162
     163#ifdef CUDA_ENABLED
    163164Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
    164165    Type * const int64ty = iBuilder->getInt64Ty();
     
    265266    return mainCPUFn;
    266267}
    267 
    268 void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType) {
    269 
    270     isUTF_16 = UTF_16;
    271     Module * M = new Module(moduleName + ":icgrep", getGlobalContext());; 
    272     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    273 
    274     const unsigned segmentSize = codegen::SegmentSize;
    275     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    276     const unsigned encodingBits = UTF_16 ? 16 : 8;
    277 
    278     mGrepType = grepType;
    279 
    280     Type * const sizeTy = iBuilder->getSizeTy();
    281     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    282     Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
    283 
    284     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    285     mainFn->setCallingConv(CallingConv::C);
    286     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    287     Function::arg_iterator args = mainFn->arg_begin();
    288    
    289     Value * inputStream = &*(args++);
    290     inputStream->setName("input");
    291     Value * fileSize = &*(args++);
    292     fileSize->setName("fileSize");
    293     Value * fileIdx = &*(args++);
    294     fileIdx->setName("fileIdx");
    295 
    296     StreamSetBuffer * byteStream = nullptr;
    297     kernel::KernelBuilder * sourceK = nullptr;
    298 //    if (usingStdIn) {
    299 //        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    300 //        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
    301 //        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
    302 //        sourceK->generateKernel({}, {byteStream});
    303 //    } else {
    304         byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    305         cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    306         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
    307         sourceK->generateKernel({}, {byteStream});
    308         sourceK->setInitialArguments({fileSize});
    309 //    }
    310 
    311     CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    312     BasisBits.allocateBuffer();
    313 
    314     kernel::S2PKernel  s2pk(iBuilder);
    315     s2pk.generateKernel({byteStream}, {&BasisBits});
    316    
    317     std::vector<pablo::PabloKernel *> icgrepKs;
    318     std::vector<StreamSetBuffer *> MatchResultsBufs;
    319 
    320     for(unsigned i=0; i<REs.size(); i++){   
    321         pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    322         re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
    323         pablo_function_passes(icgrepK);
    324         icgrepKs.push_back(icgrepK);
    325         CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);       
    326         MatchResults->allocateBuffer();
    327         MatchResultsBufs.push_back(MatchResults);
    328     }   
    329 
    330     std::vector<kernel::KernelBuilder *> KernelList;
    331     KernelList.push_back(sourceK);
    332     KernelList.push_back(&s2pk);
    333 
    334     CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    335     mergedResults.allocateBuffer();
    336 
    337     kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
    338     streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
    339 
    340     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    341     CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    342     LineBreakStream.allocateBuffer();
    343     linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    344    
    345     KernelList.push_back(&linebreakK);
    346     for(unsigned i=0; i<REs.size(); i++){
    347         icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
    348         KernelList.push_back(icgrepKs[i]);
    349     }
    350     KernelList.push_back(&streamsMergeK);
    351 
    352     if (CountOnly) {
    353         kernel::MatchCount matchCountK(iBuilder);
    354         matchCountK.generateKernel({&mergedResults}, {}); 
    355 
    356         KernelList.push_back(&matchCountK); 
    357 
    358         generatePipeline(iBuilder, KernelList);
    359         iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
    360 
    361     } else {
    362         kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
    363         scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
    364         scanMatchK.setInitialArguments({fileIdx});
    365 
    366         KernelList.push_back(&scanMatchK);
    367 
    368         generatePipeline(iBuilder, KernelList);
    369        
    370         iBuilder->CreateRetVoid();
    371     }
    372    
    373     mEngine = JIT_to_ExecutionEngine(M);
    374     ApplyObjectCache(mEngine);
    375     icgrep_Linking(M, mEngine);
    376 
    377 #ifndef NDEBUG
    378     verifyModule(*M, &dbgs());
    379268#endif
    380 
    381     mEngine->finalizeObject();
    382     delete iBuilder;
    383     delete sourceK;
    384     delete byteStream;
    385 
    386     if (CountOnly) {
    387         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    388     } else {
    389         mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
    390     }
    391 
    392 }
    393 
    394 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    395     isUTF_16 = UTF_16;
    396     int addrSpace = 0;
    397     bool CPU_Only = true;
    398     Module * M = nullptr; 
    399     IDISA::IDISA_Builder * iBuilder = nullptr;
    400 
    401 #ifdef CUDA_ENABLED
    402     setNVPTXOption();
    403     if (codegen::NVPTX) {
    404         Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
    405         IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
    406         M = gpuM;
    407         iBuilder = GPUBuilder;
    408         M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    409         M->setTargetTriple("nvptx64-nvidia-cuda");
    410         addrSpace = 1;
    411         CPU_Only = false;
    412         codegen::BlockSize = 64;
    413     }
    414 #endif
    415 
    416     Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
    417     IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
    418 
    419     if (CPU_Only) {
    420         M = cpuM;
    421         iBuilder = CPUBuilder;
    422     }
    423 
    424     // segment size made availabe for each call to the mmap source kernel
    425     const unsigned segmentSize = codegen::SegmentSize;
    426     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    427     const unsigned encodingBits = UTF_16 ? 16 : 8;
    428 
    429     mGrepType = grepType;
    430 
    431     Type * const size_ty = iBuilder->getSizeTy();
    432     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    433     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    434 
    435     Function * mainFn = nullptr;
    436     Value * inputStream = nullptr;
    437     Value * fileSize = nullptr;
    438     Value * fileIdx = nullptr;
    439 
    440 #ifdef CUDA_ENABLED   
    441     Value * outputStream = nullptr;
    442     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
    443     if (codegen::NVPTX){
    444         if (CountOnly){
    445             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
    446             mainFn->setCallingConv(CallingConv::C);
    447             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    448             Function::arg_iterator args = mainFn->arg_begin();
    449            
    450             inputStream = &*(args++);
    451             inputStream->setName("input");
    452             fileSize = &*(args++);
    453             fileSize->setName("fileSize");
    454         } else {
    455             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
    456             mainFn->setCallingConv(CallingConv::C);
    457             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    458             Function::arg_iterator args = mainFn->arg_begin();
    459            
    460             inputStream = &*(args++);
    461             inputStream->setName("input");
    462             fileSize = &*(args++);
    463             fileSize->setName("fileSize");
    464             outputStream = &*(args++);
    465             outputStream->setName("output");
    466         }
    467     }
    468 #endif
    469     if (CPU_Only) {
    470         mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    471         mainFn->setCallingConv(CallingConv::C);
    472         iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    473         Function::arg_iterator args = mainFn->arg_begin();
    474        
    475         inputStream = &*(args++);
    476         inputStream->setName("input");
    477         fileSize = &*(args++);
    478         fileSize->setName("fileSize");
    479         fileIdx = &*(args++);
    480         fileIdx->setName("fileIdx");
    481 
    482     } 
    483 
    484     StreamSetBuffer * byteStream = nullptr;
    485     kernel::KernelBuilder * sourceK = nullptr;
    486     if (usingStdIn) {
    487         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    488 
    489         byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments);
    490         cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
    491         sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
    492         sourceK->generateKernel({}, {byteStream});
    493     } else {
    494         byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    495         cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    496         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
    497         sourceK->generateKernel({}, {byteStream});
    498         sourceK->setInitialArguments({fileSize});
    499     }
    500    
    501     CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    502 
    503     kernel::S2PKernel  s2pk(iBuilder);
    504     s2pk.generateKernel({byteStream}, {&BasisBits});
    505    
    506     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    507     CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    508 
    509     linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    510     LineBreakStream.allocateBuffer();
    511 
    512     pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    513     re::re2pablo_compiler(&icgrepK, re::regular_expression_passes(re_ast), CountOnly);
    514     pablo_function_passes(&icgrepK);
    515 
    516 
    517     BasisBits.allocateBuffer();
    518 
    519     if (CountOnly) {
    520         icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {});
    521         generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    522         iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
    523     } else {
    524 #ifdef CUDA_ENABLED
    525         if (codegen::NVPTX){
    526             ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
    527             MatchResults.setStreamSetBuffer(outputStream);
    528 
    529             icgrepK.generateKernel({&BasisBits, &LineBreakStream},  {&MatchResults});
    530             generatePipelineLoop(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    531 
    532         }
    533 #endif
    534         if (CPU_Only) {
    535             CircularBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    536             MatchResults.allocateBuffer();
    537 
    538             icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {&MatchResults});
    539 
    540             kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
    541             scanMatchK.generateKernel({byteStream, &MatchResults, &LineBreakStream}, {});
    542             scanMatchK.setInitialArguments({fileIdx});
    543            
    544             generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    545         }
    546         iBuilder->CreateRetVoid();
    547     }
    548 
    549 #ifdef CUDA_ENABLED
    550     Function * mainCPUFn = nullptr;
    551     if(codegen::NVPTX){
    552         Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
    553         MDNode * Node = MDNode::get(M->getContext(),
    554                                     {llvm::ValueAsMetadata::get(kernelFunction),
    555                                      MDString::get(M->getContext(), "kernel"),
    556                                      ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
    557         NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
    558         NMD->addOperand(Node);
    559    
    560         Compile2PTX(M, IRFilename, PTXFilename);
    561         mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mGrepType);
    562         if (CountOnly) return;
    563     }
    564 #endif
    565 
    566 
    567     mEngine = JIT_to_ExecutionEngine(cpuM);
    568     ApplyObjectCache(mEngine);
    569     icgrep_Linking(cpuM, mEngine);
    570 
    571 #ifndef NDEBUG
    572     verifyModule(*M, &dbgs());
    573 #endif
    574 
    575     mEngine->finalizeObject();
    576     delete iBuilder;
    577     delete sourceK;
    578     delete byteStream;
    579    
    580     if (CountOnly) {
    581         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    582     } else {
    583 #ifdef CUDA_ENABLED
    584         if(codegen::NVPTX){
    585             mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(mEngine->getPointerToFunction(mainCPUFn));
    586         }
    587 #endif
    588         if (CPU_Only) {
    589             mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
    590         }
    591     }
    592 
    593 }
    594 
    595 re::CC * GrepEngine::grepCodepoints() {
    596     parsedCodePointSet = re::makeCC();
    597     char * mFileBuffer = getUnicodeNameDataPtr();
    598     size_t mFileSize = getUnicodeNameDataSize();
    599     mGrepFunction(mFileBuffer, mFileSize, 0);
    600     return parsedCodePointSet;
    601 }
    602 
    603 const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
    604     enum { MaxSupportedVectorWidthInBytes = 32 };
    605     AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
    606     parsedPropertyValues.clear();
    607     const std::string & str = UCD::getPropertyValueGrepString(propertyName);
    608     const auto n = str.length();
    609     // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
    610     // erroneously matching garbage data when loading the final partial block.
    611     char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
    612     std::memcpy(aligned, str.data(), n);
    613     std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
    614     mGrepFunction(aligned, n, 0);
    615     alloc.deallocate(aligned, 0);
    616     return parsedPropertyValues;
    617 }
    618269
    619270static int * total_count;
     
    632283        total_count[i] = 0;
    633284    }
    634    
     285
    635286}
    636287
    637288template<typename CodeUnit>
    638 void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const int fileIdx) {
     289void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
    639290    assert (buffer);
    640291    assert (line_start <= line_end);
    641292    assert (line_end <= filesize);
    642293
    643 #ifdef CUDA_ENABLED
    644 if (codegen::NVPTX){
    645     while(line_start>startPoints[blockNo]) blockNo++;
    646     line_start -= accumBytes[blockNo-1];
    647     line_end -= accumBytes[blockNo-1];
    648 }
    649 #endif
     294    #ifdef CUDA_ENABLED
     295    if (codegen::NVPTX){
     296        while(line_start>startPoints[blockNo]) blockNo++;
     297        line_start -= accumBytes[blockNo-1];
     298        line_end -= accumBytes[blockNo-1];
     299    }
     300    #endif
    650301
    651302    if (ShowFileNames) {
     
    695346
    696347void PrintResult(bool CountOnly, std::vector<size_t> & total_CountOnly){
    697    
    698     if(CountOnly){
     348    if (CountOnly) {
    699349        if (!ShowFileNames) {
    700350            for (unsigned i = 0; i < inputFiles.size(); ++i){
    701351                std::cout << total_CountOnly[i] << std::endl;
    702352            }
    703         }
    704         else {
     353        } else {
    705354            for (unsigned i = 0; i < inputFiles.size(); ++i){
    706355                std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
    707356            };
    708357        }
    709         return;
    710     }
    711    
    712     for (unsigned i = 0; i < inputFiles.size(); ++i){
    713         std::cout << resultStrs[i].str();
     358    } else {
     359        for (unsigned i = 0; i < inputFiles.size(); ++i){
     360            std::cout << resultStrs[i].str();
     361        }
    714362    }
    715363}
     
    737385    assert (line_start <= line_end);
    738386    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
     387}
     388
     389void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
     390    isUTF_16 = UTF_16;
     391    int addrSpace = 0;
     392    bool CPU_Only = true;
     393    Module * M = nullptr;
     394    IDISA::IDISA_Builder * iBuilder = nullptr;
     395
     396    #ifdef CUDA_ENABLED
     397    setNVPTXOption();
     398    if (codegen::NVPTX) {
     399        Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
     400        IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
     401        M = gpuM;
     402        iBuilder = GPUBuilder;
     403        M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
     404        M->setTargetTriple("nvptx64-nvidia-cuda");
     405        addrSpace = 1;
     406        CPU_Only = false;
     407        codegen::BlockSize = 64;
     408    }
     409    #endif
     410
     411    Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
     412    IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
     413    if (CPU_Only) {
     414        M = cpuM;
     415        iBuilder = CPUBuilder;
     416    }
     417    ParabixDriver pxDriver(iBuilder);
     418
     419    // segment size made availabe for each call to the mmap source kernel
     420    const unsigned segmentSize = codegen::SegmentSize;
     421    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     422    const unsigned encodingBits = UTF_16 ? 16 : 8;
     423
     424    Type * const size_ty = iBuilder->getSizeTy();
     425    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
     426    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     427
     428    Function * mainFn = nullptr;
     429    Value * inputStream = nullptr;
     430    Value * fileSize = nullptr;
     431    Value * fileIdx = nullptr;
     432
     433    #ifdef CUDA_ENABLED
     434    Value * outputStream = nullptr;
     435    Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
     436    if (codegen::NVPTX){
     437        if (CountOnly){
     438            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
     439            mainFn->setCallingConv(CallingConv::C);
     440            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     441            Function::arg_iterator args = mainFn->arg_begin();
     442
     443            inputStream = &*(args++);
     444            inputStream->setName("input");
     445            fileSize = &*(args++);
     446            fileSize->setName("fileSize");
     447        } else {
     448            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
     449            mainFn->setCallingConv(CallingConv::C);
     450            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     451            Function::arg_iterator args = mainFn->arg_begin();
     452
     453            inputStream = &*(args++);
     454            inputStream->setName("input");
     455            fileSize = &*(args++);
     456            fileSize->setName("fileSize");
     457            outputStream = &*(args++);
     458            outputStream->setName("output");
     459        }
     460    }
     461    #endif
     462
     463    if (CPU_Only) {
     464        mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     465        mainFn->setCallingConv(CallingConv::C);
     466        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     467        Function::arg_iterator args = mainFn->arg_begin();
     468
     469        inputStream = &*(args++);
     470        inputStream->setName("input");
     471        fileSize = &*(args++);
     472        fileSize->setName("fileSize");
     473        fileIdx = &*(args++);
     474        fileIdx->setName("fileIdx");
     475
     476    }
     477
     478    StreamSetBuffer * byteStream = nullptr;
     479    kernel::KernelBuilder * sourceK = nullptr;
     480    if (usingStdIn) {
     481        // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
     482        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize);
     483        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     484    } else {
     485        byteStream = new SourceFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     486        sourceK = new kernel::FileSourceKernel(iBuilder, inputStream->getType(), segmentSize);
     487        sourceK->setInitialArguments({inputStream, fileSize});
     488    }
     489    byteStream->allocateBuffer();
     490    pxDriver.addKernelCall(*sourceK, {}, {byteStream});
     491
     492    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     493    BasisBits.allocateBuffer();
     494
     495    kernel::S2PKernel s2pk(iBuilder);
     496    pxDriver.addKernelCall(s2pk, {byteStream}, {&BasisBits});
     497
     498    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     499    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     500    LineBreakStream.allocateBuffer();
     501
     502    pxDriver.addKernelCall(linebreakK, {&BasisBits}, {&LineBreakStream});
     503
     504    pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
     505    re::re2pablo_compiler(&icgrepK, re::regular_expression_passes(re_ast), CountOnly);
     506    pablo_function_passes(&icgrepK);
     507
     508    if (CountOnly) {
     509
     510        pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {});
     511
     512        pxDriver.generatePipelineIR();
     513
     514        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
     515
     516        pxDriver.JITcompileMain();
     517        pxDriver.linkAndFinalize();
     518
     519    } else {
     520
     521        #ifdef CUDA_ENABLED
     522        if (codegen::NVPTX){
     523            ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
     524            MatchResults.setStreamSetBuffer(outputStream);
     525
     526            pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {&MatchResults});
     527
     528            pxDriver.generatePipelineIR();
     529
     530            iBuilder->CreateRetVoid();
     531
     532            pxDriver.JITcompileMain();
     533            pxDriver.linkAndFinalize();
     534        }
     535        #endif
     536
     537        if (CPU_Only) {
     538
     539            CircularBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     540            MatchResults.allocateBuffer();
     541
     542            pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {&MatchResults});
     543
     544            kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, encodingBits);
     545            scanMatchK.setInitialArguments({fileIdx});
     546
     547            pxDriver.addKernelCall(scanMatchK, {&MatchResults, &LineBreakStream, byteStream}, {});
     548
     549            switch (grepType) {
     550                case GrepType::Normal:
     551                    if (UTF_16) {
     552                        pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
     553                    } else {
     554                        pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
     555                    }
     556                    break;
     557                case GrepType::NameExpression:
     558                    pxDriver.addExternalLink(scanMatchK, "matcher", &insert_codepoints);
     559                    break;
     560                case GrepType::PropertyValue:
     561                    pxDriver.addExternalLink(scanMatchK, "matcher", &insert_property_values);
     562                    break;
     563            }
     564
     565            pxDriver.generatePipelineIR();
     566
     567            iBuilder->CreateRetVoid();
     568
     569            pxDriver.JITcompileMain();
     570            pxDriver.linkAndFinalize();
     571        }
     572    }
     573
     574    #ifdef CUDA_ENABLED
     575    Function * mainCPUFn = nullptr;
     576    if(codegen::NVPTX){
     577        Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
     578        MDNode * Node = MDNode::get(M->getContext(),
     579                                    {llvm::ValueAsMetadata::get(kernelFunction),
     580                                     MDString::get(M->getContext(), "kernel"),
     581                                     ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
     582        NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
     583        NMD->addOperand(Node);
     584
     585        Compile2PTX(M, IRFilename, PTXFilename);
     586        mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mGrepType);
     587        if (CountOnly) return;
     588    }
     589    #endif
     590
     591    delete iBuilder;
     592    delete sourceK;
     593    delete byteStream;
     594
     595    if (CountOnly) {
     596        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
     597    } else {
     598        #ifdef CUDA_ENABLED
     599        if(codegen::NVPTX){
     600            mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(pxDriver.getPointerToMain());
     601        }
     602        #endif
     603        if (CPU_Only) {
     604            mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
     605        }
     606    }
     607}
     608
     609
     610void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
     611
     612    isUTF_16 = UTF_16;
     613    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
     614    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
     615
     616    const unsigned segmentSize = codegen::SegmentSize;
     617    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     618    const unsigned encodingBits = UTF_16 ? 16 : 8;
     619
     620    Type * const sizeTy = iBuilder->getSizeTy();
     621    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
     622    Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
     623
     624    Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
     625    mainFn->setCallingConv(CallingConv::C);
     626    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     627    Function::arg_iterator args = mainFn->arg_begin();
     628
     629    Value * inputStream = &*(args++);
     630    inputStream->setName("input");
     631    Value * fileSize = &*(args++);
     632    fileSize->setName("fileSize");
     633    Value * fileIdx = &*(args++);
     634    fileIdx->setName("fileIdx");
     635
     636    StreamSetBuffer * byteStream = nullptr;
     637    kernel::KernelBuilder * sourceK = nullptr;
     638    if (usingStdIn) {
     639        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize);
     640        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     641        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     642    } else {
     643        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     644        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     645        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     646        sourceK->setInitialArguments({fileSize});
     647    }
     648    sourceK->generateKernel({}, {byteStream});
     649
     650    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     651    BasisBits.allocateBuffer();
     652
     653    kernel::S2PKernel  s2pk(iBuilder);
     654    s2pk.generateKernel({byteStream}, {&BasisBits});
     655
     656    std::vector<pablo::PabloKernel *> icgrepKs;
     657    std::vector<StreamSetBuffer *> MatchResultsBufs;
     658
     659    for(unsigned i=0; i<REs.size(); i++){
     660        pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
     661        re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
     662        pablo_function_passes(icgrepK);
     663        icgrepKs.push_back(icgrepK);
     664        CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);
     665        MatchResults->allocateBuffer();
     666        MatchResultsBufs.push_back(MatchResults);
     667    }
     668
     669    std::vector<kernel::KernelBuilder *> KernelList;
     670    KernelList.push_back(sourceK);
     671    KernelList.push_back(&s2pk);
     672
     673    CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     674    mergedResults.allocateBuffer();
     675
     676    kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
     677    streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
     678
     679    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     680    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     681    LineBreakStream.allocateBuffer();
     682    linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
     683
     684    KernelList.push_back(&linebreakK);
     685    for(unsigned i=0; i<REs.size(); i++){
     686        icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
     687        KernelList.push_back(icgrepKs[i]);
     688    }
     689    KernelList.push_back(&streamsMergeK);
     690
     691    if (CountOnly) {
     692        kernel::MatchCount matchCountK(iBuilder);
     693        matchCountK.generateKernel({&mergedResults}, {});
     694
     695        KernelList.push_back(&matchCountK);
     696
     697        generatePipeline(iBuilder, KernelList);
     698        iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
     699
     700    } else {
     701        kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, encodingBits);
     702        scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
     703        scanMatchK.setInitialArguments({fileIdx});
     704
     705        KernelList.push_back(&scanMatchK);
     706
     707        generatePipeline(iBuilder, KernelList);
     708
     709        iBuilder->CreateRetVoid();
     710    }
     711
     712    mEngine = JIT_to_ExecutionEngine(M);
     713    ApplyObjectCache(mEngine);
     714    icgrep_Linking(M, mEngine);
     715
     716    mEngine->finalizeObject();
     717    delete iBuilder;
     718    delete sourceK;
     719    delete byteStream;
     720
     721    if (CountOnly) {
     722        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
     723    } else {
     724        mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     725    }
     726
     727}
     728
     729re::CC * GrepEngine::grepCodepoints() {
     730    parsedCodePointSet = re::makeCC();
     731    char * mFileBuffer = getUnicodeNameDataPtr();
     732    size_t mFileSize = getUnicodeNameDataSize();
     733    mGrepFunction(mFileBuffer, mFileSize, 0);
     734    return parsedCodePointSet;
     735}
     736
     737const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
     738    enum { MaxSupportedVectorWidthInBytes = 32 };
     739    AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
     740    parsedPropertyValues.clear();
     741    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
     742    const auto n = str.length();
     743    // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
     744    // erroneously matching garbage data when loading the final partial block.
     745    char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
     746    std::memcpy(aligned, str.data(), n);
     747    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
     748    mGrepFunction(aligned, n, 0);
     749    alloc.deallocate(aligned, 0);
     750    return parsedPropertyValues;
    739751}
    740752
     
    766778, mGrepFunction_CountOnly(nullptr)
    767779, mGrepFunction_CPU(nullptr)
    768 , mGrepType(GrepType::Normal)
    769780, mEngine(nullptr) {
    770781
Note: See TracChangeset for help on using the changeset viewer.