Changeset 5338


Ignore:
Timestamp:
Feb 22, 2017, 3:22:04 PM (2 years ago)
Author:
lindanl
Message:

Provide option to build separated kernels for each regular expression.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5336 r5338  
    7676target_link_libraries (RegExpCompiler RegExpADT)
    7777
    78 add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/unicode_linebreak_kernel.cpp)
     78add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/unicode_linebreak_kernel.cpp kernels/streams_merge.cpp)
    7979add_executable(u8u16 u8u16.cpp toolchain.cpp)
    8080add_executable(base64 base64.cpp kernels/radix64.cpp toolchain.cpp)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5336 r5338  
    1818#include <kernels/cc_kernel.h>
    1919#include <kernels/unicode_linebreak_kernel.h>
     20#include <kernels/streams_merge.h>
    2021#include <kernels/pipeline.h>
    2122#include <kernels/mmap_kernel.h>
     
    237238
    238239    return mainCPUFn;
     240}
     241
     242void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType) {
     243
     244    isUTF_16 = UTF_16;
     245    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());; 
     246    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
     247
     248    const unsigned segmentSize = codegen::SegmentSize;
     249    const unsigned bufferSegments = segmentPipelineParallel ? (codegen::BufferSegments * codegen::ThreadNum) : codegen::BufferSegments;
     250    const unsigned encodingBits = UTF_16 ? 16 : 8;
     251
     252    mGrepType = grepType;
     253
     254    Type * const size_ty = iBuilder->getSizeTy();
     255    Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     256    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
     257    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     258
     259    Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     260    mainFn->setCallingConv(CallingConv::C);
     261    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     262    Function::arg_iterator args = mainFn->arg_begin();
     263   
     264    Value * inputStream = &*(args++);
     265    inputStream->setName("input");
     266    Value * fileSize = &*(args++);
     267    fileSize->setName("fileSize");
     268    Value * fileIdx = &*(args++);
     269    fileIdx->setName("fileIdx");
     270
     271    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));   
     272    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     273    ByteStream.setStreamSetBuffer(inputStream, fileSize);
     274    BasisBits.allocateBuffer();
     275   
     276    kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
     277    mmapK.generateKernel({}, {&ByteStream});
     278    mmapK.setInitialArguments({fileSize});
     279
     280    kernel::S2PKernel  s2pk(iBuilder);
     281    s2pk.generateKernel({&ByteStream}, {&BasisBits});
     282
     283    std::vector<re::CC *> LF;
     284    LF.push_back(re::makeCC(0x0A));
     285   
     286    kernel::UnicodeLineBreakKernelBuilder unicodelbK(iBuilder, "unicodelinebreak", encodingBits);
     287    kernel::ParabixCharacterClassKernelBuilder linefeedK(iBuilder, "linefeed", LF, encodingBits);
     288
     289    pablo::PabloKernel *linebreakK = UNICODE_LINE_BREAK ? &cast<pablo::PabloKernel>(unicodelbK) :  &cast<pablo::PabloKernel>(linefeedK);
     290    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     291    LineBreakStream.allocateBuffer();
     292    linebreakK->generateKernel({&BasisBits}, {&LineBreakStream});
     293
     294    std::vector<pablo::PabloKernel *> icgrepKs;
     295    std::vector<StreamSetBuffer *> MatchResultsBufs;
     296
     297    for(unsigned i=0; i<REs.size(); i++){   
     298        pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}});
     299        re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), CountOnly);
     300        pablo_function_passes(icgrepK);
     301        icgrepKs.push_back(icgrepK);
     302        CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);       
     303        MatchResults->allocateBuffer();
     304        MatchResultsBufs.push_back(MatchResults);
     305    }   
     306
     307    std::vector<KernelBuilder *> KernelList;
     308    KernelList.push_back(&mmapK);
     309    KernelList.push_back(&s2pk);   
     310    KernelList.push_back(linebreakK);
     311
     312    CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     313    mergedResults.allocateBuffer();
     314
     315    kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
     316    streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
     317
     318    kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
     319    scanMatchK.generateKernel({&mergedResults, &LineBreakStream}, {});               
     320    scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     321
     322    for(unsigned i=0; i<REs.size(); i++){
     323        icgrepKs[i]->generateKernel({&BasisBits}, {MatchResultsBufs[i]});
     324        KernelList.push_back(icgrepKs[i]);
     325    }
     326    KernelList.push_back(&streamsMergeK);
     327    KernelList.push_back(&scanMatchK);
     328   
     329    if (pipelineParallel){
     330        generatePipelineParallel(iBuilder, KernelList);
     331    } else if (segmentPipelineParallel){
     332        generateSegmentParallelPipeline(iBuilder, KernelList);
     333    }  else{
     334        generatePipelineLoop(iBuilder, KernelList);
     335    }
     336   
     337    iBuilder->CreateRetVoid();
     338   
     339    mEngine = JIT_to_ExecutionEngine(M);
     340    ApplyObjectCache(mEngine);
     341    icgrep_Linking(M, mEngine);
     342
     343#ifndef NDEBUG
     344    verifyModule(*M, &dbgs());
     345#endif
     346
     347    mEngine->finalizeObject();
     348    delete iBuilder;
     349   
     350    mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     351
    239352}
    240353
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5267 r5338  
    2424 
    2525    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal);
    26    
     26    void multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal);
     27     
    2728    void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16);
    2829   
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5320 r5338  
    8686         Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
    8787
     88static cl::opt<bool> MultiGrepKernels("enable-multiGrep-kernels", cl::desc("Construct separated kernels for each regular expression"), cl::cat(EnhancedGrepOptions));
    8889
    8990static std::vector<std::string> allFiles;
     
    112113static std::string allREs;
    113114static re::ModeFlagSet globalFlags = 0;
     115std::vector<re::RE *> REs;
    114116
    115117re::RE * get_icgrep_RE() {
     
    136138    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
    137139
    138  
    139     std::vector<re::RE *> REs;
     140
    140141    re::RE * re_ast = nullptr;
    141142    for (unsigned i = 0; i < regexVector.size(); i++) {
     
    371372   
    372373    GrepEngine grepEngine;
    373     grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
     374    if(MultiGrepKernels){
     375        grepEngine.multiGrepCodeGen(module_name, REs, CountOnly, UTF_16);
     376    }
     377    else{
     378        grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
     379    }
    374380
    375381    allFiles = getFullFileList(inputFiles);
Note: See TracChangeset for help on using the changeset viewer.