Ignore:
Timestamp:
Mar 13, 2018, 1:17:42 PM (14 months ago)
Author:
cameron
Message:

Byte-Bit grep kernel optimizes when an RE begins with an initial trigraph

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5902 r5908  
    6262static cl::opt<bool> PropertyKernels("enable-property-kernels", cl::desc("Enable Unicode property kernels."), cl::init(false));
    6363
     64const unsigned DefaultByteCClimit = 6;
     65
     66static cl::opt<unsigned> ByteCClimit("byte-CC-limit", cl::desc("Max number of CCs for byte CC pipeline."), cl::init(DefaultByteCClimit));
     67
    6468
    6569namespace grep {
     
    196200    return (packSize * packSize) / b->getBitBlockWidth();
    197201}
    198 
     202   
    199203std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
    200204    auto & idb = mGrepDriver->getBuilder();
     
    242246    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    243247   
     248    re::RE * prefixRE;
     249    re::RE * suffixRE;
    244250    // For simple regular expressions with a small number of characters, we
    245251    // can bypass transposition and use the Direct CC compiler.
    246     if ((nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB) && byteTestsWithinLimit(REs[0], 6)) {
     252    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     253    if (isSimple) {
     254        REs[0] = toUTF8(REs[0]);
     255    }
     256    if (isSimple && byteTestsWithinLimit(REs[0], ByteCClimit)) {
    247257        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    248258        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, REs[0]);
    249259        mGrepDriver->makeKernelCall(icgrepK, {ByteStream}, {MatchResults});
    250260        MatchResultsBufs[0] = MatchResults;
    251         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{breakCC}, 1);
     261        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{breakCC}, 1);
     262        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
     263    } else if (isSimple && hasTriCCwithinLimit(REs[0], ByteCClimit, prefixRE, suffixRE)) {
     264        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     265        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE);
     266        mGrepDriver->makeKernelCall(icgrepK, {ByteStream}, {MatchResults});
     267        MatchResultsBufs[0] = MatchResults;
     268        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{breakCC}, 1);
    252269        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
    253270    } else {
Note: See TracChangeset for help on using the changeset viewer.