source: icGREP/icgrep-devel/icgrep/re/re_compiler.cpp @ 4544

Last change on this file since 4544 was 4544, checked in by cameron, 4 years ago

Tracing options; make all command line options static

File size: 21.8 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "re_compiler.h"
8//Regular Expressions
9#include <re/re_name.h>
10#include <re/re_any.h>
11#include <re/re_start.h>
12#include <re/re_end.h>
13#include <re/re_alt.h>
14#include <re/re_cc.h>
15#include <re/re_seq.h>
16#include <re/re_rep.h>
17#include <re/re_diff.h>
18#include <re/re_intersect.h>
19#include <re/re_assertion.h>
20#include <re/re_analysis.h>
21#include <cc/cc_namemap.hpp>
22#include <pablo/codegenstate.h>
23
24#include <assert.h>
25#include <stdexcept>
26
27#include "llvm/Support/CommandLine.h"
28static cl::OptionCategory fREcompilationOptions("Regex Compilation Options",
29                                      "These options control the compilation of regular expressions to Pablo.");
30
31static cl::opt<bool> DisableLog2BoundedRepetition("disable-log2-bounded-repetition", cl::init(false), 
32                     cl::desc("disable log2 optimizations for bounded repetition of bytes"), cl::cat(fREcompilationOptions));
33static cl::opt<int> IfInsertionGap("if-insertion-gap", cl::init(3), cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), cl::cat(fREcompilationOptions));
34static cl::opt<bool> DisableMatchStar("disable-matchstar", cl::init(false), 
35                     cl::desc("disable MatchStar optimization"), cl::cat(fREcompilationOptions));
36static cl::opt<bool> DisableUnicodeMatchStar("disable-Unicode-matchstar", cl::init(false), 
37                     cl::desc("disable Unicode MatchStar optimization"), cl::cat(fREcompilationOptions));
38
39static cl::opt<bool> DisableUnicodeLineBreak("disable-Unicode-linebreak", cl::init(false), 
40                     cl::desc("disable Unicode line breaks - use LF only"), cl::cat(fREcompilationOptions));
41
42
43using namespace pablo;
44
45namespace re {
46
47
48RE_Compiler::RE_Compiler(PabloBlock & baseCG)
49: mPB(baseCG)
50, mLineFeed(nullptr)
51, mInitial(nullptr)
52, mNonFinal(nullptr)
53, mFinal(nullptr)
54, mStarDepth(0)
55{
56
57}
58
59   
60MarkerType RE_Compiler::AdvanceMarker(MarkerType m, MarkerPosition newpos, PabloBlock & pb) {
61    if (m.pos == newpos) return m;
62    PabloAST * a = m.stream;
63    if (m.pos == FinalMatchByte) {
64        // Must advance at least to InitialPostPositionByte
65        a = pb.createAdvance(a, 1, "adv");
66    }
67    // Now at InitialPostPositionByte; is a further advance needed?
68    if (newpos == FinalPostPositionByte) {
69        // Must advance through nonfinal bytes
70        a = pb.createScanThru(pb.createAnd(mInitial, a), mNonFinal, "scanToFinal");
71    }
72    return {newpos, a};
73}
74
75void RE_Compiler::AlignMarkers(MarkerType & m1, MarkerType & m2, PabloBlock & pb) {
76    if (m1.pos < m2.pos) {
77        m1 = AdvanceMarker(m1, m2.pos, pb); 
78    }
79    else if (m2.pos < m1.pos) {
80        m2 = AdvanceMarker(m2, m1.pos, pb); 
81    }
82}
83
84#define UNICODE_LINE_BREAK (!DisableUnicodeLineBreak)
85
86
87void RE_Compiler::initializeRequiredStreams(cc::CC_Compiler & ccc) {
88
89    Assign * LF = mPB.createAssign("LF", ccc.compileCC(makeCC(0x0A)));
90    mLineFeed = LF;
91    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
92    PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
93
94    PabloBlock & crb = PabloBlock::Create(mPB);
95    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
96    Assign * acrlf = crb.createAssign("crlf", crb.createAnd(cr1, LF));
97    mPB.createIf(CR, {acrlf}, crb);
98    mCRLF = acrlf;
99
100    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
101    PabloBlock & it = PabloBlock::Create(mPB);
102    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
103    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
104    PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
105    Assign * u8suffix = it.createAssign("u8suffix", ccc.compileCC(makeCC(0x80, 0xBF)));
106   
107    //
108    // Two-byte sequences
109    PabloBlock & it2 = PabloBlock::Create(it);
110    Assign * u8scope22 = it2.createAssign("u8scope22", it2.createAdvance(u8pfx2, 1));
111    Assign * NEL = it2.createAssign("NEL", it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
112    it.createIf(u8pfx2, {u8scope22, NEL}, it2);
113   
114    //
115    // Three-byte sequences
116    PabloBlock & it3 = PabloBlock::Create(it);
117    Assign * u8scope32 = it3.createAssign("u8scope32", it3.createAdvance(u8pfx3, 1));
118    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
119    Assign * u8scope3X = it3.createAssign("u8scope3X", it3.createOr(u8scope32, u8scope33));
120    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
121    Assign * LS_PS = it3.createAssign("LS_PS", it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
122    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
123    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
124    Assign * EX_invalid = it3.createAssign("EX_invalid", it3.createOr(E0_invalid, ED_invalid));
125    it.createIf(u8pfx3, {u8scope32, u8scope3X, LS_PS, EX_invalid}, it3);
126 
127    //
128    // Four-byte sequences
129    PabloBlock & it4 = PabloBlock::Create(it);
130    PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
131    PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
132    PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
133    Assign * u8scope4nonfinal = it4.createAssign("u8scope4nonfinal", it4.createOr(u8scope42, u8scope43));
134    Assign * u8scope4X = it4.createAssign("u8scope4X", it4.createOr(u8scope4nonfinal, u8scope44));
135    PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
136    PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
137    Assign * FX_invalid = it4.createAssign("FX_invalid", it4.createOr(F0_invalid, F4_invalid));
138    it.createIf(u8pfx4, {u8scope4nonfinal, u8scope4X, FX_invalid}, it4);
139
140    //
141    // Invalid cases
142    PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
143    PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
144    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
145    //  a scope is a mismatch, i.e., invalid UTF-8.
146    PabloAST * mismatch = it.createXor(anyscope, u8suffix);
147    //
148    PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
149    PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
150    Assign * u8invalid = it.createAssign("u8invalid", it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
151    Assign * u8valid = it.createAssign("u8valid", it.createNot(u8invalid));
152    //
153    //
154   
155    Assign * valid_pfx = it.createAssign("valid_pfx", it.createAnd(u8pfx, u8valid));
156    mNonFinal = it.createAssign("nonfinal", it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
157   
158    Assign * NEL_LS_PS = it.createAssign("NEL_LS_PS", it.createOr(NEL, LS_PS));
159    mPB.createIf(u8pfx, {u8invalid, valid_pfx, mNonFinal, NEL_LS_PS}, it);
160   
161    PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
162    PabloAST * u8single = mPB.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), mPB.createNot(u8invalid));
163    mInitial = mPB.createOr(u8single, valid_pfx, "initial");
164    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
165    mUnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
166}
167
168void RE_Compiler::finalizeMatchResult(MarkerType match_result) {
169    //These three lines are specifically for grep.
170    PabloAST * lb = UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed;
171    PabloAST * v = markerVar(match_result);
172    mPB.createAssign("matches", mPB.createAnd(mPB.createMatchStar(v, mPB.createNot(lb)), lb), 0);
173    mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF)), 1);
174}
175
176MarkerType RE_Compiler::compile(RE * re, PabloBlock & pb) {
177    return process(re, makeMarker(FinalPostPositionByte, pb.createOnes()), pb);
178}
179
180PabloAST * RE_Compiler::nextUnicodePosition(MarkerType m, PabloBlock & pb) {
181    if (markerPos(m) == FinalPostPositionByte) {
182        return markerVar(m);
183    }
184    else if (markerPos(m) == InitialPostPositionByte) {
185        return pb.createScanThru(pb.createAnd(mInitial, markerVar(m)), mNonFinal);
186    }
187    else {
188        return pb.createScanThru(pb.createAnd(mInitial, pb.createAdvance(markerVar(m), 1)), mNonFinal);
189    }
190}
191
192MarkerType RE_Compiler::process(RE * re, MarkerType marker, PabloBlock & pb) {
193    if (Name * name = dyn_cast<Name>(re)) {
194        return process(name, marker, pb);
195    }
196    else if (Seq* seq = dyn_cast<Seq>(re)) {
197        return process(seq, marker, pb);
198    }
199    else if (Alt * alt = dyn_cast<Alt>(re)) {
200        return process(alt, marker, pb);
201    }
202    else if (Rep * rep = dyn_cast<Rep>(re)) {
203        return process(rep, marker, pb);
204    }
205    else if (Assertion * a = dyn_cast<Assertion>(re)) {
206        return process(a, marker, pb);
207    }
208    else if (isa<Any>(re)) {
209        PabloAST * nextPos = nextUnicodePosition(marker, pb);
210        PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? pb.createOr(mUnicodeLineBreak, mCRLF) : mLineFeed);
211        return makeMarker(FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
212    }
213    else if (Diff * diff = dyn_cast<Diff>(re)) {
214        return process(diff, marker, pb);
215    }
216    else if (Intersect * ix = dyn_cast<Intersect>(re)) {
217        return process(ix, marker, pb);
218    }
219    else if (isa<Start>(re)) {
220        MarkerType m = AdvanceMarker(marker, InitialPostPositionByte, pb);
221        if (UNICODE_LINE_BREAK) {
222            PabloAST * line_end = mPB.createOr(mUnicodeLineBreak, mCRLF);
223            PabloAST * sol = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
224            return makeMarker(InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
225        }
226        else {
227            PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
228            return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
229        }
230    }
231    else if (isa<End>(re)) {
232        if (UNICODE_LINE_BREAK) {
233            PabloAST * nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionByte, pb));
234            return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "end"));
235        }
236        PabloAST * nextPos = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));  // For LF match
237        return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
238    }
239    return marker;
240}
241
242MarkerType RE_Compiler::process(Name * name, MarkerType marker, PabloBlock & pb) {
243    MarkerType nextPos;
244    if (markerPos(marker) == FinalPostPositionByte) nextPos = marker;
245    else if (name->getType() == Name::Type::Byte) {
246        nextPos = AdvanceMarker(marker, InitialPostPositionByte, pb);
247    }
248    else {
249        nextPos = AdvanceMarker(marker, FinalPostPositionByte, pb);
250    }
251    return makeMarker(FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name), "m"));
252}
253
254PabloAST * RE_Compiler::getNamedCharacterClassStream(Name * name) {
255    PabloAST * var = name->getCompiled();
256    if (LLVM_LIKELY(var != nullptr)) {
257        return var;
258    }
259    else if (name->getDefinition() != nullptr) {
260        MarkerType m = compile(name->getDefinition(), mPB);
261        assert(markerPos(m) == FinalMatchByte);
262        var = markerVar(m);
263    }
264    else if (name->getType() == Name::Type::UnicodeProperty) {
265        var = mPB.createCall(name->getName());
266    }
267    else {
268        throw std::runtime_error("Unresolved name " + name->getName());
269    }
270
271    var = mPB.createAnd(var, mPB.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed));
272    name->setCompiled(var);
273    return var;
274}
275
276
277MarkerType RE_Compiler::process(Seq * seq, MarkerType marker, PabloBlock & pb) {
278    // if-hierarchies are not inserted within unbounded repetitions
279    if (mStarDepth > 0) {
280        for (RE * re : *seq) {
281            marker = process(re, marker, pb);
282        }
283        return marker;
284    }
285    else {
286        return processSeqTail(seq->begin(), seq->end(), 0, marker, pb);
287    }
288}
289
290MarkerType RE_Compiler::processSeqTail(Seq::iterator current, Seq::iterator end, int matchLenSoFar, MarkerType marker, PabloBlock & pb) {
291    if (current == end) return marker;
292    if (matchLenSoFar < IfInsertionGap) {
293        RE * r = *current;
294        marker = process(r, marker, pb);
295        current++;
296        return processSeqTail(current, end, matchLenSoFar + minMatchLength(r), marker, pb);
297    }
298    else {
299        PabloBlock & nested = PabloBlock::Create(pb);
300        MarkerType m1 = processSeqTail(current, end, 0, marker, nested);
301        Assign * m1a = nested.createAssign("m", markerVar(m1));
302        pb.createIf(markerVar(marker), {m1a}, nested);
303        return makeMarker(m1.pos, m1a);
304    }
305}
306
307MarkerType RE_Compiler::process(Alt * alt, MarkerType marker, PabloBlock & pb) {
308    std::vector<PabloAST *>  accum = {pb.createZeroes(), pb.createZeroes(), pb.createZeroes()};
309    MarkerType const base = marker;
310    // The following may be useful to force a common Advance rather than separate
311    // Advances in each alternative.
312    // MarkerType const base = makeMarker(InitialPostPositionByte, postPositionVar(marker, pb), pb);
313    for (RE * re : *alt) {
314        MarkerType rslt = process(re, base, pb);
315        MarkerPosition p = markerPos(rslt);
316        accum[p] = pb.createOr(accum[p], markerVar(rslt), "alt");
317    }
318    if (isa<Zeroes>(accum[InitialPostPositionByte]) && isa<Zeroes>(accum[FinalPostPositionByte])) {
319        return makeMarker(FinalMatchByte, accum[FinalMatchByte]);
320    }
321    PabloAST * combine = pb.createOr(accum[InitialPostPositionByte], pb.createAdvance(accum[FinalMatchByte], 1), "alt");
322    if (isa<Zeroes>(accum[FinalPostPositionByte])) {
323        return makeMarker(InitialPostPositionByte, combine);
324    }
325    combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[FinalPostPositionByte], "alt");
326    return makeMarker(FinalPostPositionByte, combine);
327}
328
329MarkerType RE_Compiler::process(Assertion * a, MarkerType marker, PabloBlock & pb) {
330    RE * asserted = a->getAsserted();
331    if (a->getKind() == Assertion::Kind::Lookbehind) {
332        MarkerType m = marker;
333        MarkerType lookback = compile(asserted, pb);
334        AlignMarkers(m, lookback, pb);
335        PabloAST * lb = markerVar(lookback);
336        if (a->getSense() == Assertion::Sense::Negative) {
337            lb = pb.createNot(lb);
338        }
339        return makeMarker(markerPos(m), pb.createAnd(markerVar(marker), lb, "lookback"));
340    }
341    else if (isUnicodeUnitLength(asserted)) {
342        MarkerType lookahead = compile(asserted, pb);
343        assert(markerPos(lookahead) == FinalMatchByte);
344        PabloAST * la = markerVar(lookahead);
345        if (a->getSense() == Assertion::Sense::Negative) {
346            la = pb.createNot(la);
347        }
348        MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionByte, pb);
349        return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
350    }
351    else {
352        throw std::runtime_error("Unsupported lookahead assertion.");
353    }
354}
355
356MarkerType RE_Compiler::process(Diff * diff, MarkerType marker, PabloBlock & pb) {
357    RE * lh = diff->getLH();
358    RE * rh = diff->getRH();
359    if (isUnicodeUnitLength(lh) && isUnicodeUnitLength(rh)) {
360        MarkerType t1 = process(lh, marker, pb);
361        MarkerType t2 = process(rh, marker, pb);
362        AlignMarkers(t1, t2, pb);
363        return makeMarker(markerPos(t1), pb.createAnd(markerVar(t1), pb.createNot(markerVar(t2)), "diff"));
364    }
365    throw std::runtime_error("Unsupported Diff operands: " + Printer_RE::PrintRE(diff));
366}
367
368MarkerType RE_Compiler::process(Intersect * x, MarkerType marker, PabloBlock & pb) {
369    RE * lh = x->getLH();
370    RE * rh = x->getRH();
371    if (isUnicodeUnitLength(lh) && isUnicodeUnitLength(rh)) {
372        MarkerType t1 = process(lh, marker, pb);
373        MarkerType t2 = process(rh, marker, pb);
374        AlignMarkers(t1, t2, pb);
375        return makeMarker(markerPos(t1), pb.createAnd(markerVar(t1), markerVar(t2), "intersect"));
376    }
377    throw std::runtime_error("Unsupported Intersect operands: " + Printer_RE::PrintRE(x));
378}
379
380MarkerType RE_Compiler::process(Rep * rep, MarkerType marker, PabloBlock & pb) {
381    int lb = rep->getLB();
382    int ub = rep->getUB();
383    if (lb > 0) {
384        marker = processLowerBound(rep->getRE(), lb, marker, pb);
385    }
386    if (ub == Rep::UNBOUNDED_REP) {
387        return processUnboundedRep(rep->getRE(), marker, pb);
388    }
389    else if (ub == lb) { // if (rep->getUB() != Rep::UNBOUNDED_REP)
390        return marker;
391    }
392    else { // if (rep->getUB() != Rep::UNBOUNDED_REP)
393        return processBoundedRep(rep->getRE(), ub - lb, marker, pb);
394    }
395}
396
397/*
398   Given a stream |repeated| marking positions associated with matches to an item
399   of length |repeated_lgth|, compute a stream marking |repeat_count| consecutive
400   occurrences of such items.
401*/
402
403inline PabloAST * RE_Compiler::consecutive1(PabloAST * repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
404        int i = repeated_lgth;
405        int total_lgth = repeat_count * repeated_lgth;
406        PabloAST * consecutive_i = repeated;
407        while (i * 2 <= total_lgth) {
408            PabloAST * v = consecutive_i;
409            PabloAST * v2 =  pb.createAdvance(v, i);
410            i *= 2;
411            consecutive_i = pb.createAnd(v, v2, "at" + std::to_string(i) + "inarow");
412        }       
413        if (i < total_lgth) {
414            PabloAST * v = consecutive_i;
415            consecutive_i = pb.createAnd(v, pb.createAdvance(v, total_lgth - i), "at" + std::to_string(total_lgth) + "inarow");
416        }
417        return consecutive_i;
418}
419
420inline PabloAST * RE_Compiler::reachable(PabloAST *repeated, int repeated_lgth, int repeat_count, pablo::PabloBlock & pb) {
421        int i = repeated_lgth;
422        int total_lgth = repeat_count * repeated_lgth;
423        if (repeat_count == 0) {
424            return repeated;
425        }
426        PabloAST * reachable_i = pb.createOr(repeated, pb.createAdvance(repeated, 1), "within1");
427        while (i * 2 < total_lgth) {
428            PabloAST * v = reachable_i;
429            PabloAST * v2 =  pb.createAdvance(v, i);
430            i *= 2;
431            reachable_i = pb.createOr(v, v2, "within" + std::to_string(i));
432        }       
433        if (i < total_lgth) {
434            PabloAST * v = reachable_i;
435            reachable_i = pb.createOr(v, pb.createAdvance(v, total_lgth - i), "within" + std::to_string(total_lgth));
436        }
437        return reachable_i;
438}
439
440MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBlock & pb) {
441    if (isByteLength(repeated) && !DisableLog2BoundedRepetition) {
442        PabloAST * cc = markerVar(compile(repeated, pb));
443        PabloAST * cc_lb = consecutive1(cc, 1, lb, pb);
444        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == FinalMatchByte ? lb : lb-1);
445        return makeMarker(FinalMatchByte, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
446    }
447    // Fall through to general case.
448    mStarDepth++;
449    while (lb-- != 0) {
450        marker = process(repeated, marker, pb);
451    }
452    mStarDepth--;
453    return marker;
454}
455
456MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBlock & pb) {
457    if (isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
458        // log2 upper bound for fixed length (=1) class
459        // Create a mask of positions reachable within ub from current marker.
460        // Use matchstar, then apply filter.
461        PabloAST * match = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
462        PabloAST * upperLimitMask = reachable(match, 1, ub, pb);
463        PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
464        PabloAST * rep_class_var = markerVar(compile(repeated, pb));
465        return makeMarker(InitialPostPositionByte, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
466    }
467    // Fall through to general case.
468    mStarDepth++;
469    while (ub-- != 0) {
470        MarkerType a = process(repeated, marker, pb);
471        MarkerType m = marker;
472        AlignMarkers(a, m, pb);
473        marker = makeMarker(markerPos(a), pb.createOr(markerVar(a), markerVar(m), "m"));
474    }
475    mStarDepth--;
476    return marker;
477}
478
479MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBlock & pb) {
480    // always use PostPosition markers for unbounded repetition.
481    PabloAST * base = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
482   
483    if (isByteLength(repeated)  && !DisableMatchStar) {
484        PabloAST * cc = markerVar(compile(repeated, pb)); 
485        return makeMarker(InitialPostPositionByte, pb.createMatchStar(base, cc, "unbounded"));
486    }
487    else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) {
488        PabloAST * cc = markerVar(compile(repeated, pb));
489        return makeMarker(FinalPostPositionByte, pb.createAnd(pb.createMatchStar(base, pb.createOr(mNonFinal, cc)), mFinal, "unbounded"));
490    }
491    else {
492        Assign * whileTest = pb.createAssign("test", base);
493        Assign * whileAccum = pb.createAssign("accum", base);
494
495        PabloBlock & wb = PabloBlock::Create(pb);
496        mStarDepth++;
497
498        PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(InitialPostPositionByte, whileTest), wb), InitialPostPositionByte, wb));
499        Next * nextWhileTest = wb.createNext(whileTest, wb.createAnd(loopComputation, wb.createNot(whileAccum)));
500        Next * nextWhileAccum = wb.createNext(whileAccum, wb.createOr(loopComputation, whileAccum));
501
502        pb.createWhile(nextWhileTest, wb);
503        mStarDepth--;
504
505        return makeMarker(InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
506    }   
507} // end of namespace re
508}
Note: See TracBrowser for help on using the repository browser.