Ignore:
Timestamp:
Nov 16, 2018, 2:19:20 PM (6 months ago)
Author:
cameron
Message:

Remove Unicode calculations from byte-length repetitions

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r6170 r6201  
    474474            // MatchStar deposits any cursors on the post position. However those cursors may may land on the initial "byte" of a
    475475            // "multi-byte" character. Combine the masked range with any nonFinals.
    476             PabloAST * bounded = pb.createMatchStar(cursor, pb.createOr(masked, u8NonFinal(pb)), "bounded");
    477             return makeMarker(FinalPostPositionUnit, bounded);
     476            PabloAST * bounded = pb.createMatchStar(cursor, masked, "bounded");
     477            return makeMarker(InitialPostPositionUnit, bounded);
    478478        }
    479479        else if (isUnicodeUnitLength(repeated)) {
     
    526526        PabloAST * mask = markerVar(compile(repeated, pb));
    527527        // The post position character may land on the initial byte of a multi-byte character. Combine them with the masked range.
    528         mask = pb.createOr(mask, u8NonFinal(pb));
    529528        PabloAST * unbounded = pb.createMatchStar(base, mask, "unbounded");
    530         return makeMarker(FinalPostPositionUnit, unbounded);
     529        return makeMarker(InitialPostPositionUnit, unbounded);
    531530    } else if (isUnicodeUnitLength(repeated) && LLVM_LIKELY(!AlgorithmOptionIsSet(DisableMatchStar) && !AlgorithmOptionIsSet(DisableUnicodeMatchStar))) {
    532531        PabloAST * mask = markerVar(compile(repeated, pb));
Note: See TracChangeset for help on using the changeset viewer.