Ignore:
Timestamp:
Oct 18, 2015, 2:14:59 PM (3 years ago)
Author:
nmedfort
Message:

Bug fix for bounded expressions + grapheme cluster mode.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4844 r4846  
    7979    PabloAST * u8pfx4 = mCCCompiler.compileCC(makeCC(0xF0, 0xF4), it);
    8080    Assign * u8suffix = it.createAssign("u8suffix", mCCCompiler.compileCC(makeCC(0x80, 0xBF)));
    81    
     81
    8282    //
    8383    // Two-byte sequences
     
    8686    Assign * NEL = it2.createAssign("NEL", it2.createAnd(it2.createAdvance(mCCCompiler.compileCC(makeCC(0xC2), it2), 1), mCCCompiler.compileCC(makeCC(0x85), it2)));
    8787    it.createIf(u8pfx2, {u8scope22, NEL}, it2);
    88    
     88
    8989    //
    9090    // Three-byte sequences
     
    9999    Assign * EX_invalid = it3.createAssign("EX_invalid", it3.createOr(E0_invalid, ED_invalid));
    100100    it.createIf(u8pfx3, {u8scope32, u8scope3X, LS_PS, EX_invalid}, it3);
    101  
     101
    102102    //
    103103    // Four-byte sequences
     
    127127    //
    128128    //
    129    
     129
    130130    Assign * valid_pfx = it.createAssign("valid_pfx", it.createAnd(u8pfx, u8valid));
    131131    mNonFinal = it.createAssign("nonfinal", it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
    132    
     132
    133133    Assign * NEL_LS_PS = it.createAssign("NEL_LS_PS", it.createOr(NEL, LS_PS));
    134134    mPB.createIf(u8pfx, {u8invalid, valid_pfx, mNonFinal, NEL_LS_PS}, it);
    135    
     135
    136136    PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    137137    PabloAST * u8single = mPB.createAnd(mCCCompiler.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
     
    263263    std::unordered_set<Name *> visited;
    264264
    265     std::function<void(RE*)> gather = [&](RE * re) {       
     265    std::function<void(RE*)> gather = [&](RE * re) {
    266266        if (Name * name = dyn_cast<Name>(re)) {
    267267            if (visited.insert(name).second) {
     
    559559/*
    560560   Given a stream |repeated| marking positions associated with matches to an item
    561    of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive 
     561   of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive
    562562   occurrences of such items.
    563563*/
     
    601601
    602602MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBuilder & pb) {
    603     if (isByteLength(repeated) && !DisableLog2BoundedRepetition) {
     603    if (!mGraphemeBoundaryRule && isByteLength(repeated) && !DisableLog2BoundedRepetition) {
    604604        PabloAST * cc = markerVar(compile(repeated, pb));
    605605        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
     
    608608    }
    609609    // Fall through to general case.
    610     while (lb-- != 0) {
     610    for (int i = 1; i <= lb; ++i) {
    611611        marker = process(repeated, marker, pb);
     612        if (mGraphemeBoundaryRule) {
     613            marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     614        }
    612615    }
    613616    return marker;
     
    615618
    616619MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBuilder & pb) {
    617     if (isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
     620    if (!mGraphemeBoundaryRule && isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
    618621        // log2 upper bound for fixed length (=1) class
    619622        // Create a mask of positions reachable within ub from current marker.
     
    626629    }
    627630    // Fall through to general case.
    628     while (ub-- != 0) {
     631    for (int i = 1; i <= ub; ++i) {
    629632        MarkerType a = process(repeated, marker, pb);
    630633        MarkerType m = marker;
    631634        AlignMarkers(a, m, pb);
    632         marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m), "m"));
     635        marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m), "upper" + std::to_string(i)));
     636        if (mGraphemeBoundaryRule) {
     637            marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     638        }
    633639    }
    634640    return marker;
     
    637643MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) {
    638644    // always use PostPosition markers for unbounded repetition.
    639     PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));   
    640     if (isByteLength(repeated)  && !DisableMatchStar) {
     645    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
     646    if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !DisableMatchStar) {
    641647        PabloAST * cc = markerVar(compile(repeated, pb));
    642648        PabloAST * mstar = nullptr;
     
    666672        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, final, "unbounded"));
    667673    } else if (mStarDepth > 0){
    668         PabloBuilder * outerb = pb.getParent();       
     674        PabloBuilder * outerb = pb.getParent();
    669675        Assign * starPending = outerb->createAssign("pending", outerb->createZeroes());
    670         Assign * starAccum = outerb->createAssign("accum", outerb->createZeroes());       
     676        Assign * starAccum = outerb->createAssign("accum", outerb->createZeroes());
    671677        mStarDepth++;
    672678        PabloAST * m1 = pb.createOr(base, starPending);
     
    702708        mLoopVariants.clear();
    703709        return makeMarker(markerPos(result), pb.createAssign("unbounded", nextWhileAccum));
    704     }   
     710    }
    705711}
    706712
Note: See TracChangeset for help on using the changeset viewer.