[3850] | 1 | /* |
---|
[5509] | 2 | * Copyright (c) 2017 International Characters. |
---|
[3850] | 3 | * This software is licensed to the public under the Open Software License 3.0. |
---|
| 4 | * icgrep is a trademark of International Characters. |
---|
| 5 | */ |
---|
| 6 | |
---|
[5267] | 7 | #include "re_rep.h" |
---|
[4971] | 8 | #include "re_assertion.h" |
---|
[4203] | 9 | #include "re_seq.h" |
---|
[5509] | 10 | #include "re_alt.h" |
---|
| 11 | #include "re_nullable.h" |
---|
[5267] | 12 | #include <llvm/Support/Casting.h> |
---|
[5646] | 13 | #include <llvm/Support/ErrorHandling.h> |
---|
[4203] | 14 | |
---|
[5267] | 15 | using namespace llvm; |
---|
| 16 | |
---|
[4203] | 17 | namespace re { |
---|
| 18 | |
---|
| 19 | inline int ubCombine(const int h1, const int h2) { |
---|
| 20 | if ((h1 == Rep::UNBOUNDED_REP) || (h2 == Rep::UNBOUNDED_REP)) { |
---|
| 21 | return Rep::UNBOUNDED_REP; |
---|
| 22 | } |
---|
| 23 | else { |
---|
| 24 | return h1 * h2; |
---|
| 25 | } |
---|
| 26 | } |
---|
[5509] | 27 | |
---|
| 28 | RE * makeRep(RE * re, int lb, const int ub) { |
---|
[5646] | 29 | if (LLVM_UNLIKELY(lb == Rep::UNBOUNDED_REP)) { |
---|
| 30 | report_fatal_error("repetition lower bound must be finite!"); |
---|
| 31 | } |
---|
| 32 | if (LLVM_UNLIKELY(ub != Rep::UNBOUNDED_REP && ub < lb)) { |
---|
| 33 | report_fatal_error("lower bound cannot exceed upper bound"); |
---|
| 34 | } |
---|
[5509] | 35 | if (RE_Nullable::isNullable(re)) { |
---|
| 36 | if (ub == 1) { |
---|
| 37 | return re; |
---|
| 38 | } |
---|
| 39 | lb = 0; |
---|
[5646] | 40 | } |
---|
[4203] | 41 | if (Rep * rep = dyn_cast<Rep>(re)) { |
---|
[5509] | 42 | int l = rep->getLB(); |
---|
| 43 | int u = rep->getUB(); |
---|
[5515] | 44 | if (lb == ub) { |
---|
| 45 | return new Rep(rep->getRE(), l * lb, ubCombine(u, ub)); |
---|
| 46 | } |
---|
| 47 | else if (u == Rep::UNBOUNDED_REP) { |
---|
[5509] | 48 | if (l == 0) { |
---|
| 49 | /* R{0,}{lb, ub} = R{0,} */ |
---|
| 50 | return rep; |
---|
| 51 | } else if (l == 1) { |
---|
| 52 | /* R{1,}{lb, ub} = R{lb,} */ |
---|
| 53 | return new Rep(rep->getRE(), lb, Rep::UNBOUNDED_REP); |
---|
| 54 | } else if (lb == 0) { |
---|
| 55 | /* R{l,}{0, ub} = R{l,}? */ |
---|
| 56 | return new Rep(rep, 0, 1); |
---|
| 57 | } else { |
---|
| 58 | /* R{l,}{lb, ub} = R{l * lb,} */ |
---|
| 59 | return new Rep(rep->getRE(), l * lb, Rep::UNBOUNDED_REP); |
---|
| 60 | } |
---|
[4203] | 61 | } |
---|
[5509] | 62 | else if (u > l) { |
---|
| 63 | // Calculate the smallest number of repetitions n such that n * u + 1 >= (n + 1) * l |
---|
| 64 | int n = (u - 2)/(u-l); |
---|
| 65 | if (lb >= n) { |
---|
| 66 | return new Rep(rep->getRE(), l * lb, ubCombine(u, ub)); |
---|
| 67 | } |
---|
| 68 | if ((ub == Rep::UNBOUNDED_REP) || (ub >= n)) { |
---|
| 69 | RE * r1 = new Rep(rep->getRE(), n * l, ubCombine(u, ub)); |
---|
[5515] | 70 | RE * r2 = makeRep(rep, lb, n - 1); // makeRep recursive simplifies. |
---|
[5509] | 71 | return makeAlt({r1, r2}); |
---|
| 72 | } |
---|
[4203] | 73 | } |
---|
| 74 | } |
---|
[4971] | 75 | else if (isa<Assertion>(re)) { |
---|
| 76 | if (lb > 0) return re; |
---|
| 77 | else return makeSeq(); |
---|
| 78 | } |
---|
[4203] | 79 | else { |
---|
| 80 | if (Seq * seq = dyn_cast<Seq>(re)) { |
---|
| 81 | if (seq->empty()) { |
---|
| 82 | return seq; |
---|
| 83 | } |
---|
| 84 | } |
---|
| 85 | if ((lb == 0) && (ub == 0)) { |
---|
| 86 | return makeSeq(); |
---|
| 87 | } |
---|
| 88 | else if ((lb == 1) && (ub == 1)) { |
---|
| 89 | return re; |
---|
| 90 | } |
---|
| 91 | } |
---|
| 92 | return new Rep(re, lb, ub); |
---|
| 93 | } |
---|
| 94 | |
---|
[5725] | 95 | RE * unrollFirst(Rep * rep) { |
---|
| 96 | RE * e = rep->getRE(); |
---|
| 97 | auto lb = rep->getLB(); |
---|
| 98 | auto ub = rep->getUB(); |
---|
| 99 | if (ub == 0) return makeAlt(); // Can't unroll - return unmatchable regexp. |
---|
| 100 | // Unroll one copy of the loop and simplify. |
---|
| 101 | RE * reduced = makeRep(e, lb == 0 ? lb : lb - 1, ub == Rep::UNBOUNDED_REP ? ub : ub - 1); |
---|
| 102 | RE * unrolled = makeSeq({e, reduced}); |
---|
| 103 | if (lb == 0) return makeAlt({makeSeq(), unrolled}); |
---|
| 104 | else return unrolled; |
---|
[4203] | 105 | } |
---|
[5725] | 106 | RE * unrollLast(Rep * rep) { |
---|
| 107 | RE * e = rep->getRE(); |
---|
| 108 | auto lb = rep->getLB(); |
---|
| 109 | auto ub = rep->getUB(); |
---|
| 110 | if (ub == 0) return makeAlt(); // Can't unroll - return unmatchable regexp. |
---|
| 111 | // Unroll one copy of the loop and simplify. |
---|
| 112 | RE * reduced = makeRep(e, lb == 0 ? lb : lb - 1, ub == Rep::UNBOUNDED_REP ? ub : ub - 1); |
---|
| 113 | RE * unrolled = makeSeq({reduced, e}); |
---|
| 114 | if (lb == 0) return makeAlt({makeSeq(), unrolled}); |
---|
| 115 | else return unrolled; |
---|
| 116 | } |
---|
| 117 | } |
---|