Ignore:
Timestamp:
Oct 11, 2015, 1:45:52 PM (3 years ago)
Author:
nmedfort
Message:

Back-up check in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4820 r4829  
    1717#include <re/re_intersect.h>
    1818#include <re/re_assertion.h>
     19#include <re/re_grapheme_boundary.hpp>
    1920#include <re/re_analysis.h>
    2021#include <re/re_memoizer.hpp>
     
    3031#include <iostream>
    3132#include <pablo/printer_pablos.h>
    32 
    3333#include "llvm/Support/CommandLine.h"
    34 static cl::OptionCategory fREcompilationOptions("Regex Compilation Options",
    35                                       "These options control the compilation of regular expressions to Pablo.");
     34#include <sstream>
     35
     36static cl::OptionCategory fREcompilationOptions("Regex Compilation Options", "These options control the compilation of regular expressions to Pablo.");
    3637
    3738static cl::opt<bool> DisableLog2BoundedRepetition("disable-log2-bounded-repetition", cl::init(false),
     
    4647static cl::opt<bool> SetMod64Approximation("mod64-approximate", cl::init(false),
    4748                     cl::desc("set mod64 approximate mode"), cl::cat(fREcompilationOptions));
     49
    4850#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    4951static cl::opt<bool> UsePregeneratedUnicode("use-pregenerated-unicode", cl::init(false),
    5052                     cl::desc("use fixed pregenerated Unicode character class sets instead"), cl::cat(fREcompilationOptions));
    5153#endif
     54
     55#define UNICODE_LINE_BREAK (!DisableUnicodeLineBreak)
     56
    5257using namespace pablo;
    5358
    5459namespace re {
    55 
    56 RE_Compiler::RE_Compiler(pablo::PabloFunction & function, cc::CC_Compiler & ccCompiler)
    57 : mCCCompiler(ccCompiler)
    58 , mLineFeed(nullptr)
    59 , mCRLF(nullptr)
    60 , mUnicodeLineBreak(nullptr)
    61 , mNonLineBreak(nullptr)
    62 , mInitial(nullptr)
    63 , mNonFinal(nullptr)
    64 , mFinal(nullptr)
    65 , mWhileTest(nullptr)
    66 , mStarDepth(0)
    67 , mLoopVariants()
    68 , mPB(*ccCompiler.getBuilder().getPabloBlock(), ccCompiler.getBuilder())
    69 , mFunction(function)
    70 {
    71 
    72 }
    73    
    74 MarkerType RE_Compiler::AdvanceMarker(MarkerType m, MarkerPosition newpos, PabloBuilder & pb) {
    75     if (m.pos == newpos) return m;
    76     PabloAST * a = m.stream;
    77     if (m.pos == MarkerPosition::FinalMatchByte) {
    78         // Must advance at least to InitialPostPositionByte
    79         a = pb.createAdvance(a, 1, "adv");
    80     }
    81     // Now at InitialPostPositionByte; is a further advance needed?
    82     if (newpos == MarkerPosition::FinalPostPositionByte) {
    83         // Must advance through nonfinal bytes
    84         a = pb.createScanThru(pb.createAnd(mInitial, a), mNonFinal, "scanToFinal");
    85     }
    86     return {newpos, a};
    87 }
    88 
    89 void RE_Compiler::AlignMarkers(MarkerType & m1, MarkerType & m2, PabloBuilder & pb) {
    90     if (m1.pos < m2.pos) {
    91         m1 = AdvanceMarker(m1, m2.pos, pb);
    92     }
    93     else if (m2.pos < m1.pos) {
    94         m2 = AdvanceMarker(m2, m1.pos, pb);
    95     }
    96 }
    97 
    98 #define UNICODE_LINE_BREAK (!DisableUnicodeLineBreak)
    9960
    10061void RE_Compiler::initializeRequiredStreams() {
     
    178139    mUnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
    179140    PabloAST * const lb = UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed;
    180     mNonLineBreak = mPB.createNot(lb);
     141    mAny = mPB.createNot(lb, "any");
    181142    mFunction.setResult(1, mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF))));
    182143}
     
    195156
    196157    Memoizer memoizer;
     158    Name * gcbRule = nullptr;
    197159
    198160    std::function<RE*(RE*)> resolve = [&](RE * re) -> RE * {
     
    211173                            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(functionName);
    212174                            Call * call = mPB.createCall(Prototype::Create(functionName, std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits());
    213                             name->setCompiled(mPB.createAnd(call, mNonLineBreak));
     175                            name->setCompiled(mPB.createAnd(call, mAny));
    214176                        } else {
    215177                        #endif
     
    231193        } else if (Alt * alt = dyn_cast<Alt>(re)) {
    232194            CC * unionCC = nullptr;
     195            std::stringstream name;
    233196            for (auto ai = alt->begin(); ai != alt->end(); ) {
    234197                RE * re = resolve(*ai);
    235198                if (CC * cc = getDefinitionIfCC(re)) {
    236                     unionCC = (unionCC == nullptr) ? cc : makeCC(unionCC, cc);
     199                    if (unionCC == nullptr) {
     200                        unionCC = cc;
     201                    } else {
     202                        unionCC = makeCC(unionCC, cc);
     203                        name << '+';
     204                    }
     205                    Name * n = cast<Name>(re);
     206                    if (n->hasNamespace()) {
     207                        name << n->getNamespace() << ':';
     208                    }
     209                    name << n->getName();
    237210                    ai = alt->erase(ai);
    238211                } else {
     
    241214            }
    242215            if (unionCC) {
    243                 alt->push_back(makeName("union", unionCC));
     216                alt->push_back(makeName(name.str(), unionCC));
    244217            }
    245218            if (alt->size() == 1) {
     
    266239                return resolve(makeName("intersect", intersectCC(lh, rh)));
    267240            }
     241        } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) {
     242            if (LLVM_LIKELY(gb->getGraphemeBoundaryRule() == nullptr)) {
     243                switch (gb->getType()) {
     244                    case GraphemeBoundary::Type::ClusterBoundary:
     245                        if (gcbRule == nullptr) {
     246                            gcbRule = cast<Name>(resolve(generateGraphemeClusterBoundaryRule()));
     247                        }
     248                        gb->setBoundaryRule(gcbRule);
     249                        break;
     250                    default:
     251                        throw std::runtime_error("Only grapheme cluster boundary rules are supported in icGrep 1.0");
     252                }
     253            }
     254            gb->setExpression(resolve(gb->getExpression()));
    268255        }
    269256        return re;
     
    296283            gather(diff->getLH());
    297284            gather(diff->getRH());
     285        } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
     286            gather(ix->getLH());
     287            gather(ix->getRH());
     288        } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) {
     289            gather(gb->getExpression());
     290            gather(gb->getGraphemeBoundaryRule());
    298291        }
    299292    };
     
    302295    gather(re);
    303296
    304     if (nameMap.size() > 0) {
     297    if (LLVM_LIKELY(nameMap.size() > 0)) {
    305298        UCD::UCDCompiler ucdCompiler(mCCCompiler);
    306299        ucdCompiler.generateWithDefaultIfHierarchy(nameMap, mPB);
    307300        for (auto t : nameMap) {
    308301            if (t.second) {
    309                 t.first->setCompiled(mPB.createAnd(t.second, mNonLineBreak));
    310             }
    311         }
    312     }
    313 
     302                t.first->setCompiled(mPB.createAnd(t.second, mAny));
     303            }
     304        }
     305    }
     306
     307    // Now precompile any grapheme segmentation rules
     308    if (gcbRule) {
     309        compileName(gcbRule, mPB);
     310    }
    314311    return re;
    315312}
     
    319316}
    320317
     318Name * RE_Compiler::generateGraphemeClusterBoundaryRule() {
     319    // 3.1.1 Grapheme Cluster Boundary Rules
     320    #define Behind(x) makeLookBehindAssertion(x)
     321    #define Ahead(x) makeLookAheadAssertion(x)
     322
     323    RE * GCB_Control = makeName("gcb", "cn", Name::Type::UnicodeProperty);
     324
     325    RE * GCB_1 = makeStart();
     326    RE * GCB_2 = makeEnd();
     327    RE * GCB_4 = Behind(GCB_Control);
     328    RE * GCB_5 = Ahead(GCB_Control);
     329
     330    RE * GCB_1_5 = makeAlt({GCB_1, GCB_2, makeSeq({GCB_4, GCB_5})});
     331
     332    RE * GCB_L = makeName("gcb", "l", Name::Type::UnicodeProperty);
     333    RE * GCB_V = makeName("gcb", "v", Name::Type::UnicodeProperty);
     334    RE * GCB_LV = makeName("gcb", "lv", Name::Type::UnicodeProperty);
     335    RE * GCB_LVT = makeName("gcb", "lvt", Name::Type::UnicodeProperty);
     336    RE * GCB_T = makeName("gcb", "t", Name::Type::UnicodeProperty);
     337    RE * GCB_RI = makeName("gcb", "ri", Name::Type::UnicodeProperty);
     338    // Legacy rules
     339    RE * GCB_6 = makeSeq({Behind(GCB_L), Ahead(makeAlt({GCB_L, GCB_V, GCB_LV, GCB_LVT}))});
     340    RE * GCB_7 = makeSeq({Behind(makeAlt({GCB_LV, GCB_V})), Ahead(makeAlt({GCB_V, GCB_T}))});
     341    RE * GCB_8 = makeSeq({Behind(makeAlt({GCB_LVT, GCB_T})), Ahead(GCB_T)});
     342    RE * GCB_8a = makeSeq({Behind(GCB_RI), Ahead(GCB_RI)});
     343    RE * GCB_9 = Ahead(makeName("gcb", "ex", Name::Type::UnicodeProperty));
     344    // Extended rules
     345    RE * GCB_9a = Ahead(makeName("gcb", "sm", Name::Type::UnicodeProperty));
     346    RE * GCB_9b = Behind(makeName("gcb", "pp", Name::Type::UnicodeProperty));
     347
     348    RE * GCB_6_9b = makeAlt({GCB_6, GCB_7, GCB_8, GCB_8a, GCB_9, GCB_9a, GCB_9b});
     349    Name * gcb = makeName("gcb", Name::Type::UnicodeProperty);
     350    gcb->setDefinition(makeDiff(GCB_6_9b,  GCB_1_5));
     351
     352    return gcb;
     353}
     354
    321355void RE_Compiler::finalizeMatchResult(MarkerType match_result) {
    322     mFunction.setResult(0, mPB.createAssign("matches", mPB.createAnd(mPB.createMatchStar(markerVar(match_result), mNonLineBreak), UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
     356    mFunction.setResult(0, mPB.createAssign("matches", mPB.createAnd(mPB.createMatchStar(markerVar(match_result), mAny), UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
    323357}
    324358
     
    327361}
    328362
    329 PabloAST * RE_Compiler::nextUnicodePosition(MarkerType m, PabloBuilder & pb) {
    330     if (markerPos(m) == MarkerPosition::FinalPostPositionByte) {
    331         return markerVar(m);
    332     } else if (markerPos(m) == MarkerPosition::InitialPostPositionByte) {
    333         return pb.createScanThru(pb.createAnd(mInitial, markerVar(m)), mNonFinal);
    334     } else {
    335         return pb.createScanThru(pb.createAnd(mInitial, pb.createAdvance(markerVar(m), 1)), mNonFinal);
    336     }
    337 }
    338 
    339363MarkerType RE_Compiler::process(RE * re, MarkerType marker, PabloBuilder & pb) {
    340364    if (Name * name = dyn_cast<Name>(re)) {
    341         return process(name, marker, pb);
    342     }
    343     else if (Seq* seq = dyn_cast<Seq>(re)) {
     365        return compileName(name, marker, pb);
     366    } else if (Seq* seq = dyn_cast<Seq>(re)) {
    344367        return process(seq, marker, pb);
    345     }
    346     else if (Alt * alt = dyn_cast<Alt>(re)) {
     368    } else if (Alt * alt = dyn_cast<Alt>(re)) {
    347369        return process(alt, marker, pb);
    348     }
    349     else if (Rep * rep = dyn_cast<Rep>(re)) {
     370    } else if (Rep * rep = dyn_cast<Rep>(re)) {
    350371        return process(rep, marker, pb);
    351     }
    352     else if (Assertion * a = dyn_cast<Assertion>(re)) {
     372    } else if (Assertion * a = dyn_cast<Assertion>(re)) {
    353373        return process(a, marker, pb);
    354     }
    355     else if (isa<Any>(re)) {
    356         PabloAST * nextPos = nextUnicodePosition(marker, pb);
    357         PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? pb.createOr(mUnicodeLineBreak, mCRLF) : mLineFeed);
    358         return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
    359     }
    360     else if (Diff * diff = dyn_cast<Diff>(re)) {
     374    } else if (isa<Any>(re)) {
     375        return compileAny(marker, pb);
     376    } else if (Diff * diff = dyn_cast<Diff>(re)) {
    361377        return process(diff, marker, pb);
    362     }
    363     else if (Intersect * ix = dyn_cast<Intersect>(re)) {
     378    } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
    364379        return process(ix, marker, pb);
    365     }
    366     else if (isa<Start>(re)) {
     380    } else if (isa<Start>(re)) {
    367381        MarkerType m = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    368382        if (UNICODE_LINE_BREAK) {
     
    370384            PabloAST * sol = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
    371385            return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    372         }
    373         else {
     386        } else {
    374387            PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
    375388            return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    376389        }
    377     }
    378     else if (isa<End>(re)) {
     390    } else if (isa<End>(re)) {
    379391        if (UNICODE_LINE_BREAK) {
    380392            PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb));
     
    383395        PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));  // For LF match
    384396        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
     397    } else if (GraphemeBoundary * gb = dyn_cast<GraphemeBoundary>(re)) {
     398        const auto inGraphemeBoundaryRule = mGraphemeBoundaryRule;
     399        mGraphemeBoundaryRule = gb->getGraphemeBoundaryRule();
     400        assert (mGraphemeBoundaryRule);
     401        marker = process(gb->getExpression(), marker, pb);
     402        mGraphemeBoundaryRule = inGraphemeBoundaryRule;
    385403    }
    386404    return marker;
    387405}
    388406
    389 MarkerType RE_Compiler::process(Name * name, MarkerType marker, PabloBuilder & pb) {
     407inline MarkerType RE_Compiler::compileAny(const MarkerType m, PabloBuilder & pb) {
     408    PabloAST * nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionByte, pb));
     409    PabloAST * lb = mLineFeed;
     410    if (UNICODE_LINE_BREAK) {
     411        lb = pb.createOr(mUnicodeLineBreak, mCRLF);
     412    }
     413    PabloAST * dot = pb.createAnd(nextFinalByte, pb.createNot(lb), "dot");
     414    MarkerPosition pos = MarkerPosition::FinalMatchByte;
     415    if (mGraphemeBoundaryRule) {
     416        dot = pb.createScanThru(dot, pb.createOr(dot, mGraphemeBoundaryRule->getCompiled()), "dot_gext");
     417        pos = MarkerPosition::InitialPostPositionByte;
     418    }
     419    return makeMarker(pos, dot);
     420}
     421
     422inline MarkerType RE_Compiler::compileName(Name * name, MarkerType marker, PabloBuilder & pb) {
    390423    MarkerType nextPos;
    391424    if (markerPos(marker) == MarkerPosition::FinalPostPositionByte) {
    392425        nextPos = marker;
    393     }
    394     else if (name->getType() == Name::Type::Byte) {
     426    } else if (name->getType() == Name::Type::Byte) {
    395427        nextPos = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    396     }
    397     else {
     428    } else {
    398429        nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
    399430    }
    400     return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name, pb), "m"));
    401 }
    402 
    403 PabloAST * RE_Compiler::getNamedCharacterClassStream(Name * name, PabloBuilder & pb) {
     431    PabloAST * namePos = pb.createAnd(markerVar(nextPos), compileName(name, pb), name->getName());
     432    MarkerPosition pos = MarkerPosition::FinalMatchByte;
     433    if (mGraphemeBoundaryRule) {
     434        namePos = pb.createScanThru(namePos, pb.createOr(namePos, pb.createOr(mNonFinal, mGraphemeBoundaryRule->getCompiled())), name->getName() + "_gext");
     435        pos = MarkerPosition::FinalPostPositionByte;
     436    }
     437    return makeMarker(pos, namePos);
     438}
     439
     440inline PabloAST * RE_Compiler::compileName(Name * name, PabloBuilder & pb) {
    404441    PabloAST * var = name->getCompiled();
    405442    if (LLVM_LIKELY(var != nullptr)) {
     
    408445        MarkerType m = compile(name->getDefinition(), pb);
    409446        assert(markerPos(m) == MarkerPosition::FinalMatchByte);
    410         var = pb.createAnd(markerVar(m), mNonLineBreak);
     447        var = pb.createAnd(markerVar(m), mAny);
    411448        name->setCompiled(var);
    412449        return var;
    413     } else {
    414         throw std::runtime_error("Unresolved name " + name->getName());
    415     }
     450    }
     451    throw std::runtime_error("Unresolved name " + name->getName());
    416452}
    417453
     
    423459        }
    424460        return marker;
    425     }
    426     else {
     461    } else {
    427462        return processSeqTail(seq->begin(), seq->end(), 0, marker, pb);
    428463    }
     
    436471        current++;
    437472        return processSeqTail(current, end, matchLenSoFar + minMatchLength(r), marker, pb);
    438     }
    439     else {
     473    } else {
    440474        PabloBuilder nested = PabloBuilder::Create(pb);
    441475        MarkerType m1 = processSeqTail(current, end, 0, marker, nested);
     
    479513        }
    480514        return makeMarker(markerPos(m), pb.createAnd(markerVar(marker), lb, "lookback"));
    481     }
    482     else if (isUnicodeUnitLength(asserted)) {
     515    } else if (isUnicodeUnitLength(asserted)) {
    483516        MarkerType lookahead = compile(asserted, pb);
    484517        assert(markerPos(lookahead) == MarkerPosition::FinalMatchByte);
     
    490523        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
    491524    }
    492     else {
    493         throw std::runtime_error("Unsupported lookahead assertion.");
    494     }
     525    throw std::runtime_error("Unsupported lookahead assertion.");
     526}
     527
     528inline bool alignedUnicodeLength(const RE * lh, const RE * rh) {
     529    const auto lhl = getUnicodeUnitLengthRange(lh);
     530    const auto rhl = getUnicodeUnitLengthRange(rh);
     531    return (lhl.first == lhl.second && lhl.first == rhl.first && lhl.second == rhl.second);
    495532}
    496533
     
    498535    RE * lh = diff->getLH();
    499536    RE * rh = diff->getRH();
    500     if (isUnicodeUnitLength(lh) && isUnicodeUnitLength(rh)) {
     537    if (alignedUnicodeLength(lh, rh)) {
    501538        MarkerType t1 = process(lh, marker, pb);
    502539        MarkerType t2 = process(rh, marker, pb);
     
    510547    RE * lh = x->getLH();
    511548    RE * rh = x->getRH();
    512     if (isUnicodeUnitLength(lh) && isUnicodeUnitLength(rh)) {
     549    if (alignedUnicodeLength(lh, rh)) {
    513550        MarkerType t1 = process(lh, marker, pb);
    514551        MarkerType t2 = process(rh, marker, pb);
     
    619656   
    620657    if (isByteLength(repeated)  && !DisableMatchStar) {
    621         PabloAST * cc = markerVar(compile(repeated, pb)); 
    622         PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, cc) : pb.createMatchStar(base, cc, "unbounded");
     658        PabloAST * cc = markerVar(compile(repeated, pb));
     659        PabloAST * mstar = nullptr;
     660        if (SetMod64Approximation) {
     661            mstar = pb.createMod64MatchStar(base, cc, "unbounded");
     662        } else {
     663            mstar = pb.createMatchStar(base, cc, "unbounded");
     664        }
    623665        return makeMarker(MarkerPosition::InitialPostPositionByte, mstar);
    624666    }
    625667    else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) {
    626668        PabloAST * cc = markerVar(compile(repeated, pb));
    627         PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, pb.createOr(mNonFinal, cc)) : pb.createMatchStar(base, pb.createOr(mNonFinal, cc));
    628         return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, mFinal, "unbounded"));
    629     }
    630     else if (mStarDepth > 0){
    631        
     669        PabloAST * mstar = nullptr;
     670        PabloAST * nonFinal = mNonFinal;
     671        if (mGraphemeBoundaryRule) {
     672            nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule->getCompiled()));
     673        }
     674        cc = pb.createOr(cc, nonFinal);
     675        if (SetMod64Approximation) {
     676            mstar = pb.createMod64MatchStar(base, cc);
     677        } else {
     678            mstar = pb.createMatchStar(base, cc);
     679        }
     680        PabloAST * final = mFinal;
     681        if (mGraphemeBoundaryRule) {
     682            final = pb.createOr(final, mGraphemeBoundaryRule->getCompiled());
     683        }
     684        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, final, "unbounded"));
     685    } else if (mStarDepth > 0){
     686
    632687        PabloBuilder * outerb = pb.getParent();
    633688       
     
    647702       
    648703        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", pb.createOr(base, nextStarAccum)));
    649     }   
    650     else {
     704    } else {
    651705        Assign * whileTest = pb.createAssign("test", base);
    652706        Assign * whilePending = pb.createAssign("pending", base);
     
    669723        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
    670724    }   
     725}
     726
     727inline MarkerType RE_Compiler::AdvanceMarker(const MarkerType m, const MarkerPosition newpos, PabloBuilder & pb) {
     728    if (m.pos == newpos) return m;
     729    PabloAST * a = m.stream;
     730    if (m.pos == MarkerPosition::FinalMatchByte) {
     731        // Must advance the previous marker to the InitialPostPositionByte
     732        a = pb.createAdvance(a, 1, "initial");
     733    }
     734    // Now at InitialPostPositionByte; is a further advance needed?
     735    if (newpos == MarkerPosition::FinalPostPositionByte) {
     736        // Must advance through nonfinal bytes
     737        a = pb.createScanThru(pb.createAnd(mInitial, a), mNonFinal, "final");
     738    }
     739    return {newpos, a};
     740}
     741
     742inline void RE_Compiler::AlignMarkers(MarkerType & m1, MarkerType & m2, PabloBuilder & pb) {
     743    if (m1.pos < m2.pos) {
     744        m1 = AdvanceMarker(m1, m2.pos, pb);
     745    } else if (m2.pos < m1.pos) {
     746        m2 = AdvanceMarker(m2, m1.pos, pb);
     747    }
     748}
     749
     750RE_Compiler::RE_Compiler(pablo::PabloFunction & function, cc::CC_Compiler & ccCompiler)
     751: mCCCompiler(ccCompiler)
     752, mLineFeed(nullptr)
     753, mCRLF(nullptr)
     754, mUnicodeLineBreak(nullptr)
     755, mAny(nullptr)
     756, mGraphemeBoundaryRule(nullptr)
     757, mInitial(nullptr)
     758, mNonFinal(nullptr)
     759, mFinal(nullptr)
     760, mWhileTest(nullptr)
     761, mStarDepth(0)
     762, mLoopVariants()
     763, mPB(*ccCompiler.getBuilder().getPabloBlock(), ccCompiler.getBuilder())
     764, mFunction(function)
     765{
     766
     767}
     768
    671769} // end of namespace re
    672 }
Note: See TracChangeset for help on using the changeset viewer.