Ignore:
Timestamp:
Dec 16, 2017, 12:51:48 PM (15 months ago)
Author:
cameron
Message:

Decouple Unicode property support from re_compiler; initial support for (?-m) flag

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5782 r5786  
    4242
    4343namespace re {
    44 
    45 RE * RE_Compiler::resolveUnicodeProperties(RE * re) {
    46     Name * ZeroWidth = nullptr;
    47     mCompiledName = &mBaseMap;
    48     gatherNames(re, ZeroWidth);
    49     // Now precompile any grapheme segmentation rules
    50     if (ZeroWidth) {
    51         mCompiledName->add(ZeroWidth, compileName(ZeroWidth, mPB));
    52     }
    53     return re;
    54 }
    55 
    56 RE * RE_Compiler::compileUnicodeNames(RE * re) {
    57     return resolveUnicodeProperties(re);
    58 }
    5944
    6045PabloAST * RE_Compiler::compile(RE * re) {
     
    341326    //
    342327    // A bounded repetition with an upper bound of at least 2.
    343     if (!mGraphemeBoundaryRule && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
     328    if (!AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
    344329        // Check for a regular expression that satisfies on of the special conditions that
    345330        // allow implementation using the log2 technique.
     
    390375    for (auto i = 0; i < group; i++) {
    391376        marker = process(repeated, marker, pb);
    392         if (mGraphemeBoundaryRule) {
    393             marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    394         }
    395377    }
    396378    if (lb == group) {
     
    412394    //
    413395    // A bounded repetition with an upper bound of at least 2.
    414     if (!mGraphemeBoundaryRule && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (ub > 1)) {
     396    if (!AlgorithmOptionIsSet(DisableLog2BoundedRepetition) && (ub > 1)) {
    415397        // Check for a regular expression that satisfies on of the special conditions that
    416398        // allow implementation using the log2 technique.
     
    466448        AlignMarkers(a, m, pb);
    467449        marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m)));
    468         if (mGraphemeBoundaryRule) {
    469             marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    470         }
    471450    }
    472451    if (ub == group) {
     
    487466    // always use PostPosition markers for unbounded repetition.
    488467    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    489     if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !AlgorithmOptionIsSet(DisableMatchStar)) {
     468    if (isByteLength(repeated)  && !AlgorithmOptionIsSet(DisableMatchStar)) {
    490469        PabloAST * mask = markerVar(compile(repeated, pb));
    491470        PabloAST * nonFinal = mNonFinal;
    492         if (mGraphemeBoundaryRule) {
    493             nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    494         }
    495471        // The post position character may land on the initial byte of a multi-byte character. Combine them with the masked range.
    496472        PabloAST * unbounded = pb.createMatchStar(base, pb.createOr(mask, nonFinal), "unbounded");
     
    500476        PabloAST * mstar = nullptr;
    501477        PabloAST * nonFinal = mNonFinal;
    502         if (mGraphemeBoundaryRule) {
    503             nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    504         }
    505478        cc = pb.createOr(cc, nonFinal);
    506479        mstar = pb.createMatchStar(base, cc);
    507480        PabloAST * final = mFinal;
    508         if (mGraphemeBoundaryRule) {
    509             final = mGraphemeBoundaryRule;
    510         }
    511481        return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(mstar, final, "unbounded"));
    512482    } else if (mStarDepth > 0){
     
    566536            marker.stream = pb.createAdvance(marker.stream, 1, "ipp");
    567537            PabloAST * nonFinal = mNonFinal;
    568             if (mGraphemeBoundaryRule) {
    569                 nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    570             }
    571538            PabloAST * starts = pb.createAnd(mInitial, marker.stream);
    572539            marker.stream = pb.createScanThru(starts, nonFinal, "fpp");
     
    594561, mLineBreak(nullptr)
    595562, mCRLF(nullptr)
    596 , mGraphemeBoundaryRule(nullptr)
    597563, mInitial(nullptr)
    598564, mNonFinal(nullptr)
Note: See TracChangeset for help on using the changeset viewer.