Changeset 5610


Ignore:
Timestamp:
Aug 12, 2017, 2:52:01 PM (9 days ago)
Author:
nmedfort
Message:

Bug fix for bounded/unbounded repetitions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5593 r5610  
    2929#include "cc/cc_compiler.h"         // for CC_Compiler
    3030#include "pablo/builder.hpp"        // for PabloBuilder
     31
    3132namespace pablo { class PabloAST; }
    3233namespace pablo { class PabloKernel; }
     
    113114        nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    114115    }
    115     return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(marker), pb.createAnd(mCCCompiler.compileCC(cc), mAny)));
     116    return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(marker), pb.createAnd(mCCCompiler.compileCC(cc, pb), mAny)));
    116117}
    117118
     
    200201        return makeMarker(MarkerPosition::FinalMatchUnit, accum[MarkerPosition::FinalMatchUnit]);
    201202    }
    202     PabloAST * combine = pb.createAdvance(accum[MarkerPosition::FinalMatchUnit], 1);
     203    PabloAST * combine = pb.createAdvance(accum[MarkerPosition::FinalMatchUnit], 1, "combine");
    203204    combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[MarkerPosition::FinalPostPositionUnit], "alt");
    204205    return makeMarker(MarkerPosition::FinalPostPositionUnit, combine);
     
    303304    if (LLVM_LIKELY(i < total)) {
    304305        PabloAST * v = consecutive_i;
    305         consecutive_i = pb.createAnd(v, pb.createAdvance(v, total - i), "at" + std::to_string(total));
     306        PabloAST * v2 = pb.createAdvance(v, total - i);
     307        consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(total));
    306308    }
    307309    return consecutive_i;
     
    314316        return repeated;
    315317    }
    316     PabloAST * reachable_i = pb.createOr(repeated, pb.createAdvance(repeated, 1), "within1");
     318    PabloAST * reachable = pb.createOr(repeated, pb.createAdvance(repeated, 1), "within1");
    317319    while ((i * 2) < total_lgth) {
    318         PabloAST * v = reachable_i;
     320        PabloAST * v = reachable;
    319321        PabloAST * v2 =  pb.createAdvance(v, i);
    320322        i *= 2;
    321         reachable_i = pb.createOr(v, v2, "within" + std::to_string(i));
     323        reachable = pb.createOr(v, v2, "within" + std::to_string(i));
    322324    }
    323325    if (LLVM_LIKELY(i < total_lgth)) {
    324         PabloAST * v = reachable_i;
    325         reachable_i = pb.createOr(v, pb.createAdvance(v, total_lgth - i), "within" + std::to_string(total_lgth));
    326     }
    327     return reachable_i;
     326        PabloAST * v = reachable;
     327        PabloAST * v2 = pb.createAdvance(v, total_lgth - i);
     328        reachable = pb.createOr(v, v2, "within" + std::to_string(total_lgth));
     329    }
     330    return reachable;
    328331}
    329332
     
    334337        PabloAST * cc = markerVar(compile(repeated, pb));
    335338        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
    336         PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1);
     339        const auto pos = markerPos(marker) == MarkerPosition::FinalMatchUnit ? lb : lb - 1;
     340        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), pos);
    337341        return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    338342    }
     
    360364   
    361365MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, int ifGroupSize,  PabloBuilder & pb) {
    362     if (ub == 0) {
     366    if (LLVM_UNLIKELY(ub == 0)) {
    363367        return marker;
    364368    } else if (!mGraphemeBoundaryRule && isByteLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
     
    366370        // Create a mask of positions reachable within ub from current marker.
    367371        // Use matchstar, then apply filter.
    368         PabloAST * match = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    369         PabloAST * upperLimitMask = reachable(match, 1, ub, pb);
    370372        PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    371         PabloAST * rep_class_var = markerVar(compile(repeated, pb));
    372         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
    373     }
    374     // Fall through to general case.  Process the first item and insert the rest into an if-structure.   
     373        // If we've advanced the cursor to the post position unit, cursor begins on the first masked bit of the bounded mask.
     374        // Extend the mask by ub - 1 byte positions to ensure the mask ends on the FinalMatchUnit of the repeated region.
     375        PabloAST * upperLimitMask = reachable(cursor, 1, ub - 1, pb);
     376        PabloAST * masked = pb.createAnd(markerVar(compile(repeated, pb)), upperLimitMask);
     377        PabloAST * nonFinal = mNonFinal;
     378        if (mGraphemeBoundaryRule) {
     379            nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
     380        }
     381        // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
     382        // "multi-byte" character. Combine the masked range with any nonFinals.
     383        PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, nonFinal), "bounded");
     384        return makeMarker(MarkerPosition::FinalPostPositionUnit, bounded);
     385    }
     386    // Fall through to general case.  Process the first item and insert the rest into an if-structure.
    375387    auto group = ifGroupSize < ub ? ifGroupSize : ub;
    376388    for (auto i = 0; i < group; i++) {
     
    401413    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    402414    if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !AlgorithmOptionIsSet(DisableMatchStar)) {
    403         PabloAST * cc = markerVar(compile(repeated, pb));
    404         PabloAST * mstar = nullptr;
    405         mstar = pb.createMatchStar(base, cc, "unbounded");
    406         return makeMarker(MarkerPosition::FinalPostPositionUnit, mstar);
     415        PabloAST * mask = markerVar(compile(repeated, pb));
     416        PabloAST * nonFinal = mNonFinal;
     417        if (mGraphemeBoundaryRule) {
     418            nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
     419        }
     420        // The post position character may land on the initial byte of a multi-byte character. Combine them with the masked range.
     421        PabloAST * unbounded = pb.createMatchStar(base, pb.createOr(mask, nonFinal), "unbounded");
     422        return makeMarker(MarkerPosition::FinalPostPositionUnit, unbounded);
    407423    } else if (isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableMatchStar) && !AlgorithmOptionIsSet(DisableUnicodeMatchStar)) {
    408424        PabloAST * cc = markerVar(compile(repeated, pb));
     
    482498                nonFinal = pb.createOr(nonFinal, pb.createNot(mGraphemeBoundaryRule, "gext"));
    483499            }
    484             marker.stream = pb.createScanThru(pb.createAnd(mInitial, marker.stream), nonFinal, "fpp");
     500            PabloAST * starts = pb.createAnd(mInitial, marker.stream);
     501            marker.stream = pb.createScanThru(starts, nonFinal, "fpp");
    485502            marker.pos = MarkerPosition::FinalPostPositionUnit;
    486503        }
Note: See TracChangeset for help on using the changeset viewer.