Changeset 5585


Ignore:
Timestamp:
Jul 31, 2017, 12:41:52 PM (3 months ago)
Author:
xuedongx
Message:

use multiplexed character classes as the input to grep kernel, restructure the icGrep pipeline: Matches = RE_compiler<regexp>(CharacterClasses?, LineBreaks?)

Location:
icGREP/icgrep-devel/icgrep
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5574 r5585  
    1111#include <UCD/UnicodeNameData.h>
    1212#include <UCD/resolve_properties.h>
     13#include <kernels/charclasses.h>
    1314#include <kernels/cc_kernel.h>
    1415#include <kernels/grep_kernel.h>
     
    2526#include <re/re_toolchain.h>
    2627#include <toolchain/toolchain.h>
     28#include <re/re_name_resolve.h>   
     29#include <re/re_collect_unicodesets.h>
     30#include <re/re_multiplex.h>
    2731#include <toolchain/cpudriver.h>
    2832#include <toolchain/NVPTXDriver.h>
     
    322326    const auto n = REs.size();
    323327   
     328    std::vector<std::vector<UCD::UnicodeSet>> charclasses;
     329
     330    for (unsigned i = 0; i < n; i++) {
     331        std::vector<UCD::UnicodeSet> UnicodeSets;
     332        REs[i] = resolveNames(REs[i]);
     333        re::collect_UnicodeSets(REs[i], UnicodeSets);
     334        std::vector<std::vector<unsigned>> exclusiveSetIDs;
     335        std::vector<UCD::UnicodeSet> multiplexedCCs;
     336
     337        doMultiplexCCs(UnicodeSets, exclusiveSetIDs, multiplexedCCs);
     338
     339        REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs, multiplexedCCs);
     340        charclasses.push_back(multiplexedCCs);
     341    }
     342
    324343    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    325    
     344
    326345    for(unsigned i = 0; i < n; ++i){
     346        StreamSetBuffer * CharClasses = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(charclasses[i].size()), segmentSize * bufferSegments));
     347        kernel::Kernel * ccK = grepDriver->addKernelInstance(make_unique<kernel::CharClassesKernel>(idb, charclasses[i]));
     348        grepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
    327349        StreamSetBuffer * MatchResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    328         kernel::Kernel * icgrepK = grepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
    329         grepDriver->makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
     350        kernel::Kernel * icgrepK = grepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i], true, charclasses[i].size()));
     351        grepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
    330352        MatchResultsBufs[i] = MatchResults;
    331353    }
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5561 r5585  
    177177
    178178
    179 
    180 ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re)
     179ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, bool cc, unsigned cc_size)
    181180: RegularExpressionOptimizer(re)
    182181, PabloKernel(iBuilder,
    183182              "ic" + sha1sum(mSignature),
    184               {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}, Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
     183              {Binding{iBuilder->getStreamSetTy(cc ? cc_size : 8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}, Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
    185184              {Binding{iBuilder->getStreamSetTy(1, 1), "matches"}}) {
    186185
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5561 r5585  
    4242class ICGrepKernel : public RegularExpressionOptimizer, public pablo::PabloKernel {
    4343public:
    44     ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast);
     44    ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast, bool cc = false, unsigned cc_size = 0);
    4545    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    4646    bool isCachable() const override { return true; }
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5565 r5585  
    4242namespace re {
    4343
    44 
    4544RE * RE_Compiler::resolveUnicodeProperties(RE * re) {
    4645    Name * ZeroWidth = nullptr;
    4746    mCompiledName = &mBaseMap;
    4847
    49     re = resolveNames(re);
    5048    auto nameMap = gatherNames(re, ZeroWidth);
    51     if (LLVM_LIKELY(nameMap.size() > 0)) {
    52         UCD::UCDCompiler ucdCompiler(mCCCompiler);
    53         if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
    54             ucdCompiler.generateWithoutIfHierarchy(nameMap, mPB);
    55         } else {
    56             ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB);
    57         }
    58         for (auto t : nameMap) {
    59             if (t.second) {
    60                 mCompiledName->add(t.first, makeMarker(MarkerPosition::FinalMatchUnit, mPB.createAnd(t.second, mAny)));
    61             }
    62         }
    63     }
    6449
    6550    // Now precompile any grapheme segmentation rules
     
    130115        nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    131116    }
    132     return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(marker), pb.createAnd(mCCCompiler.compileCC(cc), mFinal)));
     117    return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(marker), pb.createAnd(mCCCompiler.compileCC(cc), mAny)));
    133118}
    134119
Note: See TracChangeset for help on using the changeset viewer.