Changeset 4420


Ignore:
Timestamp:
Jan 15, 2015, 8:03:21 AM (4 years ago)
Author:
cameron
Message:

Improvements for bounded repetition

Location:
icGREP/icgrep-devel/icgrep/re
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4416 r4420  
    333333        return processUnboundedRep(rep->getRE(), marker, pb);
    334334    }
     335    else if (ub == lb) { // if (rep->getUB() != Rep::UNBOUNDED_REP)
     336        return marker;
     337    }
    335338    else { // if (rep->getUB() != Rep::UNBOUNDED_REP)
    336339        return processBoundedRep(rep->getRE(), ub - lb, marker, pb);
     
    339342
    340343/*
    341    Given a stream |repeated| marking positions immediately after matches to an item
    342    of length |repeated_lgth|, compute a stream marking positions immediately after
    343    |repeat_count| consecutive occurrences of such items.
     344   Given a stream |repeated| marking positions associated with matches to an item
     345   of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive
     346   occurrences of such items.
    344347*/
    345348
    346 inline Assign * RE_Compiler::consecutive(Assign * repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
     349inline Assign * RE_Compiler::consecutive1(Assign * repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
    347350        int i = repeated_lgth;
    348351        int total_lgth = repeat_count * repeated_lgth;
    349352        Assign * consecutive_i = repeated;
    350         while (i * 2 < total_lgth) {
     353        while (i * 2 <= total_lgth) {
    351354            PabloAST * v = consecutive_i;
    352             consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, i)));
     355            PabloAST * v2 =  pb.createAdvance(v, i);
    353356            i *= 2;
     357            consecutive_i = pb.createAssign("at" + std::to_string(i) + "inarow" , pb.createAnd(v,v2));
    354358        }       
    355359        if (i < total_lgth) {
    356360            PabloAST * v = consecutive_i;
    357             consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, total_lgth - i)));
     361            consecutive_i = pb.createAssign("at" + std::to_string(total_lgth) + "inarow" , pb.createAnd(v, pb.createAdvance(v, total_lgth - i)));
    358362        }
    359363        return consecutive_i;
     364}
     365
     366inline Assign * RE_Compiler::reachable(Assign * repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
     367        int i = repeated_lgth;
     368        int total_lgth = repeat_count * repeated_lgth;
     369        if (repeat_count == 0) {
     370            return repeated;
     371        }
     372        Assign * reachable_i = pb.createAssign("within1", pb.createOr(repeated, pb.createAdvance(repeated, 1)));
     373        while (i * 2 < total_lgth) {
     374            PabloAST * v = reachable_i;
     375            PabloAST * v2 =  pb.createAdvance(v, i);
     376            i *= 2;
     377            reachable_i = pb.createAssign("within" + std::to_string(i), pb.createOr(v, v2));
     378        }       
     379        if (i < total_lgth) {
     380            PabloAST * v = reachable_i;
     381            reachable_i = pb.createAssign("within" + std::to_string(total_lgth), pb.createOr(v, pb.createAdvance(v, total_lgth - i)));
     382        }
     383        return reachable_i;
    360384}
    361385
    362386MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBlock & pb) {
    363387    if (isByteLength(repeated)) {
    364         PabloAST * cc = markerVar(compile(repeated, pb));
    365         Assign * cc_lb = consecutive(pb.createAssign("repeated", pb.createAdvance(cc,1)), 1, lb, pb);
    366         PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == FinalMatchByte ? lb + 1 : lb);
    367         return makeMarker(InitialPostPositionByte, pb.createAssign("lowerbound", pb.createAnd(marker_fwd, cc_lb)));
     388        Assign * cc = markerVar(compile(repeated, pb));
     389        Assign * cc_lb = consecutive1(cc, 1, lb, pb);
     390        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == FinalMatchByte ? lb : lb-1);
     391        return makeMarker(FinalMatchByte, pb.createAssign("lowerbound", pb.createAnd(marker_fwd, cc_lb)));
    368392    }
    369393    // Fall through to general case.
     
    375399
    376400MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBlock & pb) {
    377     if (isByteLength(repeated)) {
     401    if (isByteLength(repeated) && ub > 1) {
    378402        // log2 upper bound for fixed length (=1) class
    379         // Mask out any positions that are more than ub positions from a current match.
     403        // Create a mask of positions reachable within ub from current marker.
    380404        // Use matchstar, then apply filter.
    381         Assign * nonMatch = pb.createAssign("nonmatch", pb.createNot(markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb))));
    382         PabloAST * upperLimitMask = pb.createNot(consecutive(nonMatch, 1, ub + 1, pb));
     405        Assign * match = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     406        Assign * upperLimitMask = reachable(match, 1, ub, pb);
    383407        PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
    384408        PabloAST * rep_class_var = markerVar(compile(repeated, pb));
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4411 r4420  
    8181    MarkerType process(Diff * diff, MarkerType marker, pablo::PabloBlock & cg);
    8282    MarkerType process(Intersect * x, MarkerType marker, pablo::PabloBlock & cg);
    83     pablo::Assign * consecutive(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
     83    pablo::Assign * consecutive1(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
     84    pablo::Assign * reachable(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
    8485    static bool isFixedLength(RE * regexp);
    8586    MarkerType processLowerBound(RE * repeated,  int lb, MarkerType marker, pablo::PabloBlock & pb);
Note: See TracChangeset for help on using the changeset viewer.