Changeset 4846


Ignore:
Timestamp:
Oct 18, 2015, 2:14:59 PM (2 years ago)
Author:
nmedfort
Message:

Bug fix for bounded expressions + grapheme cluster mode.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4844 r4846  
    7979    PabloAST * u8pfx4 = mCCCompiler.compileCC(makeCC(0xF0, 0xF4), it);
    8080    Assign * u8suffix = it.createAssign("u8suffix", mCCCompiler.compileCC(makeCC(0x80, 0xBF)));
    81    
     81
    8282    //
    8383    // Two-byte sequences
     
    8686    Assign * NEL = it2.createAssign("NEL", it2.createAnd(it2.createAdvance(mCCCompiler.compileCC(makeCC(0xC2), it2), 1), mCCCompiler.compileCC(makeCC(0x85), it2)));
    8787    it.createIf(u8pfx2, {u8scope22, NEL}, it2);
    88    
     88
    8989    //
    9090    // Three-byte sequences
     
    9999    Assign * EX_invalid = it3.createAssign("EX_invalid", it3.createOr(E0_invalid, ED_invalid));
    100100    it.createIf(u8pfx3, {u8scope32, u8scope3X, LS_PS, EX_invalid}, it3);
    101  
     101
    102102    //
    103103    // Four-byte sequences
     
    127127    //
    128128    //
    129    
     129
    130130    Assign * valid_pfx = it.createAssign("valid_pfx", it.createAnd(u8pfx, u8valid));
    131131    mNonFinal = it.createAssign("nonfinal", it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
    132    
     132
    133133    Assign * NEL_LS_PS = it.createAssign("NEL_LS_PS", it.createOr(NEL, LS_PS));
    134134    mPB.createIf(u8pfx, {u8invalid, valid_pfx, mNonFinal, NEL_LS_PS}, it);
    135    
     135
    136136    PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
    137137    PabloAST * u8single = mPB.createAnd(mCCCompiler.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
     
    263263    std::unordered_set<Name *> visited;
    264264
    265     std::function<void(RE*)> gather = [&](RE * re) {       
     265    std::function<void(RE*)> gather = [&](RE * re) {
    266266        if (Name * name = dyn_cast<Name>(re)) {
    267267            if (visited.insert(name).second) {
     
    559559/*
    560560   Given a stream |repeated| marking positions associated with matches to an item
    561    of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive 
     561   of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive
    562562   occurrences of such items.
    563563*/
     
    601601
    602602MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBuilder & pb) {
    603     if (isByteLength(repeated) && !DisableLog2BoundedRepetition) {
     603    if (!mGraphemeBoundaryRule && isByteLength(repeated) && !DisableLog2BoundedRepetition) {
    604604        PabloAST * cc = markerVar(compile(repeated, pb));
    605605        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
     
    608608    }
    609609    // Fall through to general case.
    610     while (lb-- != 0) {
     610    for (int i = 1; i <= lb; ++i) {
    611611        marker = process(repeated, marker, pb);
     612        if (mGraphemeBoundaryRule) {
     613            marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     614        }
    612615    }
    613616    return marker;
     
    615618
    616619MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBuilder & pb) {
    617     if (isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
     620    if (!mGraphemeBoundaryRule && isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
    618621        // log2 upper bound for fixed length (=1) class
    619622        // Create a mask of positions reachable within ub from current marker.
     
    626629    }
    627630    // Fall through to general case.
    628     while (ub-- != 0) {
     631    for (int i = 1; i <= ub; ++i) {
    629632        MarkerType a = process(repeated, marker, pb);
    630633        MarkerType m = marker;
    631634        AlignMarkers(a, m, pb);
    632         marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m), "m"));
     635        marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m), "upper" + std::to_string(i)));
     636        if (mGraphemeBoundaryRule) {
     637            marker = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     638        }
    633639    }
    634640    return marker;
     
    637643MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) {
    638644    // always use PostPosition markers for unbounded repetition.
    639     PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));   
    640     if (isByteLength(repeated)  && !DisableMatchStar) {
     645    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
     646    if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !DisableMatchStar) {
    641647        PabloAST * cc = markerVar(compile(repeated, pb));
    642648        PabloAST * mstar = nullptr;
     
    666672        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, final, "unbounded"));
    667673    } else if (mStarDepth > 0){
    668         PabloBuilder * outerb = pb.getParent();       
     674        PabloBuilder * outerb = pb.getParent();
    669675        Assign * starPending = outerb->createAssign("pending", outerb->createZeroes());
    670         Assign * starAccum = outerb->createAssign("accum", outerb->createZeroes());       
     676        Assign * starAccum = outerb->createAssign("accum", outerb->createZeroes());
    671677        mStarDepth++;
    672678        PabloAST * m1 = pb.createOr(base, starPending);
     
    702708        mLoopVariants.clear();
    703709        return makeMarker(markerPos(result), pb.createAssign("unbounded", nextWhileAccum));
    704     }   
     710    }
    705711}
    706712
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp

    r4841 r4846  
    1212#include <re/re_assertion.h>
    1313#include <re/re_grapheme_boundary.hpp>
     14#include <re/re_analysis.h>
    1415#include <algorithm>
    1516#include <memory>
     
    2021RE * RE_Simplifier::simplify(RE * re) {
    2122    if (Alt * alt = dyn_cast<Alt>(re)) {
    22         std::vector<RE*> list;
     23        std::vector<RE *> list;
    2324        list.reserve(alt->size());
    2425        for (RE * re : *alt) {
     
    2728        re = makeAlt(list.begin(), list.end());
    2829    } else if (Seq * seq = dyn_cast<Seq>(re)) {
    29         std::vector<RE*> list;
     30        std::vector<RE *> list;
    3031        list.reserve(seq->size());
    3132        for (RE * re : *seq) {
     
    3637        re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense());
    3738    } else if (Rep * rep = dyn_cast<Rep>(re)) {
    38         re = makeRep(simplify(rep->getRE()), rep->getLB(), rep->getUB());
     39        RE * expr = simplify(rep->getRE());
     40        if (GraphemeBoundary * gp = dyn_cast<GraphemeBoundary>(expr)) {
     41            if (gp->getExpression() && isUnicodeUnitLength(gp->getExpression())) {
     42                rep->setRE(gp->getExpression());
     43                gp->setExpression(rep);
     44                return gp;
     45            }
     46        }
     47        re = makeRep(expr, rep->getLB(), rep->getUB());
    3948    } else if (Diff * diff = dyn_cast<Diff>(re)) {
    4049        re = makeDiff(simplify(diff->getLH()), diff->getRH());
    4150    } else if (Intersect * e = dyn_cast<Intersect>(re)) {
    4251        re = makeIntersect(simplify(e->getLH()), e->getRH());
     52    } else if (GraphemeBoundary * gp = dyn_cast<GraphemeBoundary>(re)) {
     53        if (gp->getExpression() && isa<GraphemeBoundary>(gp->getExpression())) {
     54            re = gp->getExpression();
     55        }
    4356    }
    4457    return re;
Note: See TracChangeset for help on using the changeset viewer.