Changeset 4340 for icGREP/icgrep-devel


Ignore:
Timestamp:
Dec 15, 2014, 7:08:59 PM (5 years ago)
Author:
cameron
Message:

Compiler can produce either FinalByte? or PostPosition? markers; allows subexpr compile

Location:
icGREP/icgrep-devel
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/greptest.xml

    r4338 r4340  
    353353</datafile>
    354354
    355 <datafile id = "CRLF">line with CRLF &#13;&#10;two lines with LFCR &#10;&#13;final line
     355<datafile id = "CRLF">line with CRLF &#13;&#10;two lines with LFCR &#10;&#13;final line 
    356356</datafile>
    357357 <grepcase regexp="^$" datafile="CRLF" grepcount="1"/>
     
    433433 å¥œ
    434434 A plain line.
    435  </datafile>
     435</datafile>
    436436 <grepcase regexp="[\u{1234}-\u{1245}]" datafile="codepoints" grepcount="2"/>
    437437 <grepcase regexp="[\u{086}-\u{9A}]" datafile="codepoints" grepcount="1"/>
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4337 r4340  
    2323#include <stdexcept>
    2424
     25using namespace pablo;
     26
     27namespace re {
     28
     29MarkerType makePostPositionMarker(std::string marker_name, PabloAST * s, PabloBlock & pb) {
     30    return MarkerType{PostPosition, pb.createAssign(marker_name, s)};
     31}
     32   
     33MarkerType wrapPostPositionMarker(Assign * s) {
     34    return MarkerType{PostPosition, s};
     35}
     36   
     37MarkerType makeFinalPositionMarker(std::string marker_name, PabloAST * s, PabloBlock & pb) {
     38    return MarkerType{FinalByte, pb.createAssign(marker_name, s)};
     39}
     40
     41Assign * markerStream(MarkerType m, PabloBlock & pb) {
     42    return m.stream;
     43}
     44
     45Var * markerVar(MarkerType m, PabloBlock & pb) {
     46    return pb.createVar(m.stream);
     47}
     48
     49Var * postPositionVar(MarkerType m, PabloBlock & pb) {
     50    if (isFinalPositionMarker(m)) return pb.createVar(pb.createAssign("f", pb.createAdvance(pb.createVar(m.stream), 1)));
     51    else return pb.createVar(m.stream);
     52}
     53
    2554//Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
    2655//#define USE_IF_FOR_NONFINAL
    2756
    28 using namespace pablo;
    29 
    30 namespace re {
     57
    3158
    3259RE_Compiler::RE_Compiler(PabloBlock & baseCG, const cc::CC_NameMap & nameMap)
     
    4168
    4269//#define USE_IF_FOR_NONFINAL 1
     70#define UNICODE_LINE_BREAK false
    4371
    4472   
     
    4674
    4775    mLineFeed = ccc.compileCC(makeCC(0x0A));
     76    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
     77    PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     78    PabloAST * NEL = mCG.createAnd(mCG.createAdvance(ccc.compileCC(makeCC(0xC2)), 1), ccc.compileCC(makeCC(0x85)));
     79    PabloAST * E2_80 = mCG.createAnd(mCG.createAdvance(ccc.compileCC(makeCC(0xE2)), 1), ccc.compileCC(makeCC(0x80)));
     80    PabloAST * LS_PS = mCG.createAnd(mCG.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9)));
     81    PabloAST * LB_chars = mCG.createOr(LF_VT_FF_CR, mCG.createOr(NEL, LS_PS));
     82    mCRLF = mCG.createAnd(mCG.createAdvance(CR, 1), mLineFeed);
     83    mUnicodeLineBreak = mCG.createAnd(LB_chars, mCG.createNot(mCRLF));  // count the CR, but not CRLF
    4884    PabloAST * u8single = ccc.compileCC(makeCC(0x00, 0x7F));
    4985    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF));
     
    71107}
    72108
    73 void RE_Compiler::finalizeMatchResult(Assign * match_result) {
     109void RE_Compiler::finalizeMatchResult(MarkerType match_result) {
    74110    //These three lines are specifically for grep.
    75     mCG.createAssign("matches", mCG.createAnd(mCG.createMatchStar(mCG.createVar(match_result), mCG.createNot(mLineFeed)), mLineFeed), 0);
    76     mCG.createAssign("lf", mLineFeed, 1);
    77 }
    78    
    79 Assign * RE_Compiler::compile(RE * re, PabloBlock & pb) {
    80         return process(re, pb.createAssign("start", pb.createOnes()), pb);
     111    PabloAST * lb = UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed;
     112    mCG.createAssign("matches", mCG.createAnd(mCG.createMatchStar(markerVar(match_result, mCG), mCG.createNot(lb)), lb), 0);
     113    mCG.createAssign("lf", lb, 1);//mCG.createAnd(lb, mCG.createNot(mCRLF)), 1);
     114}
     115   
     116MarkerType RE_Compiler::compile(RE * re, PabloBlock & pb) {
     117        return process(re, makePostPositionMarker("start", pb.createOnes(), pb), pb);
    81118}
    82119       
     
    89126        if (var == nullptr) {
    90127            RE * def = name->getDefinition();
    91             assert(!isa<CC>(def));  //  Names mapping to CCs should have been compiled.
    92             assert(name->getType == Name::Type::Unicode);  //
    93             // compile in top-level block
    94             name -> setCompiled(pb.createVar(compile(def, mCG)));
    95         }
     128                assert(!isa<CC>(def));  //  Names mapping to CCs should have been compiled.
     129                assert(name->getType == Name::Type::Unicode);  //
     130                // compile in top-level block
     131                MarkerType m = compile(def, mCG);
     132                assert(isFinalPositionMarker(m));
     133                name -> setCompiled(pb.createVar(markerStream(m, mCG)));
     134        }
    96135        return name->getCompiled();
    97136    }
    98137}
    99138
    100 Assign * RE_Compiler::process(RE * re, Assign * marker, PabloBlock & pb) {
     139PabloAST * RE_Compiler::nextUnicodePosition(MarkerType m, PabloBlock & pb) {
     140    if (isPostPositionMarker(m)) {
     141        return pb.createScanThru(pb.createVar(pb.createAnd(mInitial, markerVar(m, pb))), mNonFinal);
     142    }
     143    else {
     144        //return pb.createAdvanceThenScanThru(pb.createVar(markerVar(m), pb), mNonFinal);
     145        return pb.createScanThru(pb.createAnd(mInitial, pb.createAdvance(pb.createVar(markerVar(m, pb)), 1)), mNonFinal);
     146       
     147    }
     148}
     149   
     150
     151MarkerType RE_Compiler::process(RE * re, MarkerType marker, PabloBlock & pb) {
    101152    if (Name * name = dyn_cast<Name>(re)) {
    102         marker = process(name, marker, pb);
     153        return process(name, marker, pb);
    103154    }
    104155    else if (Seq* seq = dyn_cast<Seq>(re)) {
    105         marker = process(seq, marker, pb);
     156        return process(seq, marker, pb);
    106157    }
    107158    else if (Alt * alt = dyn_cast<Alt>(re)) {
    108         marker = process(alt, marker, pb);
     159        return process(alt, marker, pb);
    109160    }
    110161    else if (Rep * rep = dyn_cast<Rep>(re)) {
    111         marker = process(rep, marker, pb);
     162        return process(rep, marker, pb);
    112163    }
    113164    else if (isa<Any>(re)) {
    114         // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    115         PabloAST * markerVar = pb.createVar(marker);
    116         markerVar = pb.createAnd(markerVar, mInitial);
    117         markerVar = pb.createScanThru(markerVar, mNonFinal);
    118         PabloAST * dot = pb.createNot(mLineFeed);
    119         marker = pb.createAssign("dot", pb.createAdvance(pb.createAnd(markerVar, dot), 1));
     165        PabloAST * nextPos = nextUnicodePosition(marker, pb);
     166        PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? pb.createOr(mUnicodeLineBreak, mCRLF) : mLineFeed);
     167        return makeFinalPositionMarker("dot", pb.createAnd(nextPos, dot), pb);
    120168    }
    121169    else if (Diff * diff = dyn_cast<Diff>(re)) {
    122         marker = process(diff, marker, pb);
     170        return process(diff, marker, pb);
    123171    }
    124172    else if (Intersect * ix = dyn_cast<Intersect>(re)) {
    125         marker = process(ix, marker, pb);
     173        return process(ix, marker, pb);
    126174    }
    127175    else if (isa<Start>(re)) {
    128         PabloAST * const sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
    129         marker = pb.createAssign("sol", pb.createAnd(pb.createVar(marker), sol));
     176        if (UNICODE_LINE_BREAK) {
     177            PabloAST * line_end = mCG.createOr(mUnicodeLineBreak, mCRLF);
     178            PabloAST * sol = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
     179            return makePostPositionMarker("sol", pb.createAnd(postPositionVar(marker, pb), sol), pb);
     180        }
     181        else {
     182            PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
     183            return makePostPositionMarker("sol", pb.createAnd(postPositionVar(marker, pb), sol), pb);
     184        }
    130185    }
    131186    else if (isa<End>(re)) {
    132         marker = pb.createAssign("eol", pb.createAnd(pb.createVar(marker), mLineFeed));
     187        if (UNICODE_LINE_BREAK) {
     188            // We would have to advance to the end of the Unicode LB category,
     189            // but that violates our marker assumption (a third marker type: atNextFinal???)
     190            throw std::runtime_error("Unsupported: $ with Unicode line break");
     191        }
     192        PabloAST * nextPos = postPositionVar(marker, pb);  // For LF match
     193        return makePostPositionMarker("eol", pb.createAnd(nextPos, mLineFeed), pb);
    133194    }
    134195    return marker;
    135196}
    136197
    137 inline Assign * RE_Compiler::process(Name * name, Assign * marker, PabloBlock & pb) {
    138     PabloAST * markerVar = pb.createVar(marker);
    139     if (name->getType() != Name::Type::Byte) {
    140         // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    141         markerVar = pb.createAnd(markerVar, mInitial);
    142         markerVar = pb.createScanThru(markerVar, mNonFinal);
    143     }
    144     return pb.createAssign("m", pb.createAdvance(pb.createAnd(character_class_strm(name, pb), markerVar), 1));
    145 }
    146 
    147 inline Assign * RE_Compiler::process(Seq * seq, Assign * marker, PabloBlock & pb) {
     198MarkerType RE_Compiler::process(Name * name, MarkerType marker, PabloBlock & pb) {
     199    PabloAST * nextPos = (name->getType() == Name::Type::Byte) ? postPositionVar(marker, pb): nextUnicodePosition(marker, pb);
     200    return makeFinalPositionMarker("m", pb.createAnd(nextPos, character_class_strm(name, pb)), pb);
     201}
     202
     203MarkerType RE_Compiler::process(Seq * seq, MarkerType marker, PabloBlock & pb) {
    148204    for (RE * re : *seq) {
    149205        marker = process(re, marker, pb);
     
    152208}
    153209
    154 inline Assign * RE_Compiler::process(Alt * alt, Assign * marker, PabloBlock & pb) {
    155     if (alt->empty()) {
    156         marker = pb.createAssign("fail", pb.createZeroes()); // always fail (note: I'm not sure this ever occurs. How do I create a 0-element alternation?)
     210MarkerType RE_Compiler::process(Alt * alt, MarkerType marker, PabloBlock & pb) {
     211    PabloAST * atPosnAccumulator = nullptr;
     212    PabloAST * postPosnAccumulator = nullptr;
     213    MarkerType const base = marker;
     214    // The following may be useful to force a common Advance rather than separate
     215    // Advances in each alternative.
     216    // MarkerType const base = makePostPositionMarker(postPositionVar(marker, pb), pb);
     217    for (RE * re : *alt) {
     218        MarkerType rslt = process(re, base, pb);
     219        PabloAST * rsltStream = markerVar(rslt, pb);
     220        if (isFinalPositionMarker(rslt)) {
     221            atPosnAccumulator = (atPosnAccumulator == nullptr) ? rsltStream : pb.createOr(atPosnAccumulator, rsltStream);
     222        }
     223        else {
     224            postPosnAccumulator = (postPosnAccumulator == nullptr) ? rsltStream : pb.createOr(postPosnAccumulator, rsltStream);
     225        }
     226    }
     227    if (postPosnAccumulator == nullptr) {
     228        return makeFinalPositionMarker("alt", atPosnAccumulator == nullptr ? pb.createZeroes() : atPosnAccumulator, pb);
    157229    }
    158230    else {
    159         auto i = alt->begin();
    160         Assign * const base = marker;
    161         marker = process(*i, base, pb);
    162         while (++i != alt->end()) {
    163             Assign * other = process(*i, base, pb);
    164             marker = pb.createAssign("alt", pb.createOr(pb.createVar(marker), pb.createVar(other)));
    165         }
    166     }   
    167     return marker;
    168 }
    169 
    170 inline Assign * RE_Compiler::process(Diff * diff, Assign * marker, PabloBlock & pb) {
     231        if (atPosnAccumulator != nullptr) {
     232            postPosnAccumulator = pb.createOr(postPosnAccumulator, pb.createAdvance(atPosnAccumulator, 1));
     233        }
     234        return makePostPositionMarker("alt", postPosnAccumulator, pb);
     235    }
     236}
     237
     238MarkerType RE_Compiler::process(Diff * diff, MarkerType marker, PabloBlock & pb) {
    171239    RE * lh = diff->getLH();
    172240    RE * rh = diff->getRH();
    173241    if ((isa<Any>(lh) || isa<Name>(lh)) && (isa<Any>(rh) || isa<Name>(rh))) {
    174         Assign * t1 = process(lh, marker, pb);
    175         Assign * t2 = process(rh, marker, pb);
    176         return pb.createAssign("diff", pb.createAnd(pb.createVar(t1), pb.createNot(pb.createVar(t2))));
     242        MarkerType t1 = process(lh, marker, pb);
     243        MarkerType t2 = process(rh, marker, pb);
     244        assert(isFinalPositionMarker(t1) && isFinalPositionMarker(t2));
     245        return makeFinalPositionMarker("diff", pb.createAnd(markerVar(t1, pb), pb.createNot(markerVar(t2, pb))), pb);
    177246    }
    178247    throw std::runtime_error("Unsupported Diff operands: " + Printer_RE::PrintRE(diff));
    179248}
    180249
    181 inline Assign * RE_Compiler::process(Intersect * x, Assign * marker, PabloBlock & pb) {
     250MarkerType RE_Compiler::process(Intersect * x, MarkerType marker, PabloBlock & pb) {
    182251    RE * lh = x->getLH();
    183252    RE * rh = x->getRH();
    184253    if ((isa<Any>(lh) || isa<Name>(lh)) && (isa<Any>(rh) || isa<Name>(rh))) {
    185         Assign * t1 = process(lh, marker, pb);
    186         Assign * t2 = process(rh, marker, pb);
    187         return pb.createAssign("intersect", pb.createAnd(pb.createVar(t1), pb.createVar(t2)));
     254        MarkerType t1 = process(lh, marker, pb);
     255        MarkerType t2 = process(rh, marker, pb);
     256        assert(isFinalPositionMarker(t1) && isFinalPositionMarker(t2));
     257        return makeFinalPositionMarker("intersect", pb.createAnd(markerVar(t1, pb), markerVar(t2, pb)), pb);
    188258    }
    189259    throw std::runtime_error("Unsupported Intersect operands: " + Printer_RE::PrintRE(x));
    190260}
    191261
    192 inline Assign * RE_Compiler::process(Rep * rep, Assign * marker, PabloBlock & pb) {
     262MarkerType RE_Compiler::process(Rep * rep, MarkerType marker, PabloBlock & pb) {
    193263    int lb = rep->getLB();
    194264    int ub = rep->getUB();
     
    197267    }
    198268    if (ub == Rep::UNBOUNDED_REP) {
    199         marker = processUnboundedRep(rep->getRE(), marker, pb);
     269        return processUnboundedRep(rep->getRE(), marker, pb);
    200270    }
    201271    else { // if (rep->getUB() != Rep::UNBOUNDED_REP)
    202         marker = processBoundedRep(rep->getRE(), ub - lb, marker, pb);
     272        return processBoundedRep(rep->getRE(), ub - lb, marker, pb);
    203273    }   
    204     return marker;
    205 }
    206 
     274}
    207275
    208276/*
     
    213281       
    214282inline Assign * RE_Compiler::consecutive(Assign * repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
    215         int i = repeated_lgth;
    216         int total_lgth = repeat_count * repeated_lgth;
    217         Assign * consecutive_i = repeated;
    218         while (i * 2 < total_lgth) {
     283        int i = repeated_lgth;
     284        int total_lgth = repeat_count * repeated_lgth;
     285        Assign * consecutive_i = repeated;
     286        while (i * 2 < total_lgth) {
    219287        PabloAST * v = pb.createVar(consecutive_i);
    220                 consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, i)));
    221                 i *= 2;
    222         }
    223         if (i < total_lgth) {
     288                consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, i)));
     289                i *= 2;
     290        }
     291        if (i < total_lgth) {
    224292        PabloAST * v = pb.createVar(consecutive_i);
    225                 consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, total_lgth - i)));
    226         }
    227         return consecutive_i;
     293                consecutive_i = pb.createAssign("consecutive", pb.createAnd(v, pb.createAdvance(v, total_lgth - i)));
     294        }
     295        return consecutive_i;
    228296}
    229297               
     
    232300}
    233301
    234 inline Assign * RE_Compiler::processLowerBound(RE * repeated, int lb, Assign * marker, PabloBlock & pb) {
    235         if (isFixedLength(repeated)) {
     302MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBlock & pb) {
     303    if (isFixedLength(repeated)) {
    236304        Name * name = cast<Name>(repeated);
    237         Assign * cc_lb = consecutive(pb.createAssign("repeated", pb.createAdvance(name->getCompiled(), 1)), 1, lb, pb);
    238                 return pb.createAssign("lowerbound", pb.createAnd(pb.createAdvance(pb.createVar(marker), lb), pb.createVar(cc_lb)));
    239         }
    240         // Fall through to general case.
    241         while (lb-- != 0) {
    242                 marker = process(repeated, marker, pb);
    243         }
    244         return marker;
    245 }
    246 
    247 inline Assign * RE_Compiler::processBoundedRep(RE * repeated, int ub, Assign * marker, PabloBlock & pb) {
    248         if (isFixedLength(repeated)) {
    249                 // log2 upper bound for fixed length (=1) class
    250                 // Mask out any positions that are more than ub positions from a current match.
    251                 // Use matchstar, then apply filter.
    252                 Assign * nonMatch = pb.createAssign("nonmatch", pb.createNot(pb.createVar(marker)));
    253                 PabloAST * upperLimitMask = pb.createNot(pb.createVar(consecutive(nonMatch, 1, ub + 1, pb)));
     305        Assign * cc_lb = consecutive(pb.createAssign("repeated", pb.createAdvance(name->getCompiled(),1)), 1, lb, pb);
     306        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker, pb), isFinalPositionMarker(marker) ? lb+ 1 : lb);
     307        return makePostPositionMarker("lowerbound", pb.createAnd(marker_fwd, pb.createVar(cc_lb)), pb);
     308    }
     309    // Fall through to general case.
     310    while (lb-- != 0) {
     311        marker = process(repeated, marker, pb);
     312    }
     313    return marker;
     314}
     315
     316MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBlock & pb) {
     317    if (isFixedLength(repeated)) {
     318        // log2 upper bound for fixed length (=1) class
     319        // Mask out any positions that are more than ub positions from a current match.
     320        // Use matchstar, then apply filter.
     321        Assign * nonMatch = pb.createAssign("nonmatch", pb.createNot(postPositionVar(marker, pb)));
     322        PabloAST * upperLimitMask = pb.createNot(pb.createVar(consecutive(nonMatch, 1, ub + 1, pb)));
    254323        PabloAST * rep_class_var = cast<Name>(repeated)->getCompiled();
    255         return pb.createAssign("bounded", pb.createAnd(pb.createMatchStar(pb.createVar(marker), rep_class_var), upperLimitMask));
    256         }
    257         // Fall through to general case.
    258         while (ub-- != 0) {
    259                 Assign * alt = process(repeated, marker, pb);
    260                 marker = pb.createAssign("m", pb.createOr(pb.createVar(marker), pb.createVar(alt)));
    261         }
    262         return marker;
    263 }
    264 
    265 inline Assign * RE_Compiler::processUnboundedRep(RE * repeated, Assign * marker, PabloBlock & pb) {
    266 
    267     PabloAST * unbounded = nullptr;
    268 
     324        return makePostPositionMarker("bounded", pb.createAnd(pb.createMatchStar(postPositionVar(marker, pb), rep_class_var), upperLimitMask), pb);
     325    }
     326    // Fall through to general case.
     327    while (ub-- != 0) {
     328        MarkerType a = process(repeated, marker, pb);
     329        if (isFinalPositionMarker(a) && isFinalPositionMarker(marker)) {
     330            marker = makeFinalPositionMarker("m", pb.createOr(markerVar(marker, pb), markerVar(a, pb)), pb);
     331        }
     332        else {
     333            marker = makePostPositionMarker("m", pb.createOr(postPositionVar(marker, pb), postPositionVar(a, pb)), pb);
     334        }
     335    }
     336    return marker;
     337}
     338
     339MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBlock & pb) {
     340    // always use PostPosition markers for unbounded repetition.
     341    PabloAST * base = postPositionVar(marker, pb);
     342   
    269343    if (isa<Name>(repeated)) {
    270344        Name * name = cast<Name>(repeated);
    271 
    272345        PabloAST * cc = character_class_strm(name, pb);
    273 
    274         unbounded = pb.createVar(marker);
    275346        if (name->getType() == Name::Type::Byte) {
    276             unbounded = pb.createMatchStar(unbounded, cc);
     347            return makePostPositionMarker("unbounded", pb.createMatchStar(base, cc), pb);
    277348        }
    278349        else { // Name::Unicode and Name::UnicodeCategory
    279             unbounded = pb.createAnd(pb.createMatchStar(unbounded, pb.createOr(mNonFinal, cc)), mInitial);
     350            return makePostPositionMarker("unbounded", pb.createAnd(pb.createMatchStar(base, pb.createOr(mNonFinal, cc)), mInitial), pb);
    280351        }       
    281352    }
    282353    else if (isa<Any>(repeated)) {
    283         PabloAST * dot = pb.createNot(mLineFeed);
    284         unbounded = pb.createVar(marker);
    285         unbounded = pb.createAnd(pb.createMatchStar(unbounded, pb.createOr(mNonFinal, dot)), mInitial);
     354        PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed);
     355        return makePostPositionMarker("unbounded", pb.createAnd(pb.createMatchStar(base, pb.createOr(mNonFinal, dot)), mInitial), pb);
    286356    }
    287357    else if (isa<Diff>(repeated) && isa<Any>(cast<Diff>(repeated)->getLH()) && isa<Name>(cast<Diff>(repeated)->getRH())) {
    288358        Name * name = cast<Name>(cast<Diff>(repeated)->getRH());
    289359        PabloAST * cc = pb.createNot(pb.createOr(character_class_strm(name, pb), mLineFeed));
    290         unbounded = pb.createVar(marker);
    291         unbounded = pb.createAnd(pb.createMatchStar(unbounded, pb.createOr(mNonFinal, cc)), mInitial);
     360        return makePostPositionMarker("unbounded", pb.createAnd(pb.createMatchStar(base, pb.createOr(mNonFinal, cc)), mInitial), pb);
    292361    }
    293362    else {
    294         Var * markerVar = pb.createVar(marker);
    295         Assign * whileTest = pb.createAssign("test", markerVar);
    296         Assign * whileAccum = pb.createAssign("accum", markerVar);
    297 
    298         PabloBlock wb(pb);
    299 
    300         Var * loopComputation = wb.createVar(process(repeated, whileTest, wb));
     363        Assign * whileTest = pb.createAssign("test", base);
     364        Assign * whileAccum = pb.createAssign("accum", base);
     365
     366        PabloBlock wb(pb);
     367
     368        Var * loopComputation = postPositionVar(process(repeated, wrapPostPositionMarker(whileTest), wb), wb);
    301369
    302370        Var * whileAccumVar = wb.createVar(whileAccum);
     
    308376        pb.createWhile(wb.createVar(nextWhileTest), std::move(wb));
    309377
    310         unbounded = whileAccumVar;
     378        return makePostPositionMarker("unbounded", whileAccumVar, pb);
    311379    }   
    312     return pb.createAssign("unbounded", unbounded);
    313 }
    314 
    315380} // end of namespace re
     381}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4334 r4340  
    2626}
    2727
     28/*  Marker streams represent the results of matching steps.
     29    Two types of marker streams are used internally.
     30    FinalByte markers are used for character classes and
     31    other strings by a one bit at their final position.
     32    PostPosition markers are used to mark matches with
     33    a 1 bit immediately after a match.   PostPosition markers
     34    are generally required whenever a regular expression element
     35    can match the empty string (e.g., * and ? repeated items).
     36*/
     37   
    2838namespace re {
     39
     40enum MarkerPosition {FinalByte, PostPosition};
     41
     42struct MarkerType {
     43    MarkerPosition pos;
     44    pablo::Assign * stream;
     45};
     46
     47inline bool isPostPositionMarker(MarkerType m) {
     48    return m.pos == PostPosition;
     49}
     50
     51inline bool isFinalPositionMarker(MarkerType m) {
     52    return m.pos == FinalByte;
     53}
     54
     55MarkerType makePostPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
     56
     57MarkerType makeFinalPositionMarker(std::string marker_name, pablo::PabloAST * s, pablo::PabloBlock & pb);
     58
     59pablo::Assign * markerStream(MarkerType m, pablo::PabloBlock & pb);
     60
     61pablo::Var * markerVar(MarkerType m, pablo::PabloBlock & pb);
     62
     63pablo::Var * postPositionVar(MarkerType m, pablo::PabloBlock & pb);
     64
    2965class RE_Compiler {
    3066public:
     
    3268    RE_Compiler(pablo::PabloBlock & baseCG, const cc::CC_NameMap & nameMap);
    3369    void initializeRequiredStreams(cc::CC_Compiler & ccc);
    34     void finalizeMatchResult(pablo::Assign * match_result);
    35     pablo::Assign * compile(RE * re) {
     70    void finalizeMatchResult(MarkerType match_result);
     71    MarkerType compile(RE * re) {
    3672        return compile(re, mCG);
    3773    }
     
    3975private:
    4076
    41     pablo::Assign * compile(RE * re, pablo::PabloBlock & cg);
     77    MarkerType compile(RE * re, pablo::PabloBlock & cg);
    4278
    4379    pablo::PabloAST * character_class_strm(Name * name, pablo::PabloBlock & pb);
    44     pablo::Assign * process(RE * re, pablo::Assign *marker, pablo::PabloBlock & pb);
    45     pablo::Assign * process(Name * name, pablo::Assign * marker, pablo::PabloBlock & pb);
    46     pablo::Assign * process(Seq * seq, pablo::Assign * marker, pablo::PabloBlock & pb);
    47     pablo::Assign * process(Alt * alt, pablo::Assign * marker, pablo::PabloBlock & pb);
    48     pablo::Assign * process(Rep * rep, pablo::Assign *marker, pablo::PabloBlock & pb);
    49     pablo::Assign * process(Diff * diff, pablo::Assign * marker, pablo::PabloBlock & cg);
    50     pablo::Assign * process(Intersect * x, pablo::Assign * marker, pablo::PabloBlock & cg);
     80    pablo::PabloAST * nextUnicodePosition(MarkerType m, pablo::PabloBlock & pb);
     81    MarkerType process(RE * re, MarkerType marker, pablo::PabloBlock & pb);
     82    MarkerType process(Name * name, MarkerType marker, pablo::PabloBlock & pb);
     83    MarkerType process(Seq * seq, MarkerType marker, pablo::PabloBlock & pb);
     84    MarkerType process(Alt * alt, MarkerType marker, pablo::PabloBlock & pb);
     85    MarkerType process(Rep * rep, MarkerType marker, pablo::PabloBlock & pb);
     86    MarkerType process(Diff * diff, MarkerType marker, pablo::PabloBlock & cg);
     87    MarkerType process(Intersect * x, MarkerType marker, pablo::PabloBlock & cg);
    5188    pablo::Assign * consecutive(pablo::Assign * repeated,  int repeated_lgth, int repeat_count, pablo::PabloBlock & pb);
    5289    static bool isFixedLength(RE * regexp);
    53     pablo::Assign * processLowerBound(RE * repeated,  int lb, pablo::Assign * marker, pablo::PabloBlock & pb);
    54     pablo::Assign * processUnboundedRep(RE * repeated, pablo::Assign * marker, pablo::PabloBlock & pb);
    55     pablo::Assign * processBoundedRep(RE * repeated, int ub, pablo::Assign * marker, pablo::PabloBlock & pb);
     90    MarkerType processLowerBound(RE * repeated,  int lb, MarkerType marker, pablo::PabloBlock & pb);
     91    MarkerType processUnboundedRep(RE * repeated, MarkerType marker, pablo::PabloBlock & pb);
     92    MarkerType processBoundedRep(RE * repeated, int ub, MarkerType marker, pablo::PabloBlock & pb);
    5693
    5794    pablo::PabloBlock &                             mCG;
    5895    const cc::CC_NameMap &                          mNameMap;
    5996    pablo::Var *                                    mLineFeed;
     97    pablo::PabloAST *                               mCRLF;
     98    pablo::PabloAST *                               mUnicodeLineBreak;
    6099    pablo::PabloAST *                               mInitial;
    61100    pablo::PabloAST *                               mNonFinal;   
Note: See TracChangeset for help on using the changeset viewer.