Changeset 3594
 Timestamp:
 Dec 23, 2013, 3:36:03 PM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/RE/doc/CanonicalRE.hs
r3593 r3594 12 12 import Data.Char 13 13 14 data RE = CC String  Seq [RE]  Alt [RE]  Rep RE Int Int  14 data RepLimit = UpperBound Int  Unbounded deriving Show 15 upper_bound_dec (UpperBound j) = UpperBound (j  1) 16 upper_bound_dec Unbounded = Unbounded 17 18 data RE = CC String  Seq [RE]  Alt [RE]  Rep RE Int RepLimit  15 19 Start  End  lookbehind/lookahead assertions for newline 16 20  Rep r i j represents bounded repetition, with lower bound i and … … 27 31 canonicalize (Proto.Join rs) = Seq (map canonicalize rs) 28 32 canonicalize (Proto.Alt rs) = Alt (map canonicalize rs) 29 canonicalize (Proto.Opt r) = Rep (canonicalize r) 0 130 canonicalize (Proto.Kstar r) = Rep (canonicalize r) 0 (1)31 canonicalize (Proto.Kplus r) = Rep (canonicalize r) 1 (1)32 canonicalize (Proto.OrMore i r) = Rep (canonicalize r) i (1)33 canonicalize (Proto.Bounded i j r) = Rep (canonicalize r) i j33 canonicalize (Proto.Opt r) = Rep (canonicalize r) 0 (UpperBound 1) 34 canonicalize (Proto.Kstar r) = Rep (canonicalize r) 0 Unbounded 35 canonicalize (Proto.Kplus r) = Rep (canonicalize r) 1 Unbounded 36 canonicalize (Proto.OrMore i r) = Rep (canonicalize r) i Unbounded 37 canonicalize (Proto.Bounded i j r) = Rep (canonicalize r) i (UpperBound j) 34 38 35 39 … … 63 67  otherwise = Seq ((removeNullablePrefix r):rs) 64 68 removeNullablePrefix (Alt rs) = Alt (map removeNullablePrefix rs) 65 removeNullablePrefix (Rep r i j) 69 removeNullablePrefix (Rep r 0 ub) = Seq [] 70 removeNullablePrefix (Rep r lb ub) 66 71  minMatchLen(r) == 0 = Seq [] 67  i == 0 = Seq [] 68  otherwise = Seq [removeNullablePrefix(r), Rep r (i1) (j1)] 72  otherwise = Seq [removeNullablePrefix(r), Rep r (lb1) (upper_bound_dec ub)] 69 73 70 74 … … 100 104  101 105 re2pablo_helper(Start, m) = 102 (m+1, [SetMarker(m+1, And( m, Not(Advance(Not(CharClass('\n'))))))])106 (m+1, [SetMarker(m+1, And(Marker m, Not(Advance(Not(CharClass("\n"))))))]) 103 107 104 108  To match "$" we must have reached end of line. 105 109  106 110 re2pablo_helper(End, m) = 107 (m+1, [SetMarker(m+1, And( m, CharClass('\n')))])111 (m+1, [SetMarker(m+1, And(Marker m, CharClass("\n")))]) 108 112 109 113  Seq [] is the empty regexp which matches the empty string. … … 128 132  does. 129 133 130 re2pablo_helper(Alt (r1:rs), m) = ( Or(m1, m2), s1 ++ s2)134 re2pablo_helper(Alt (r1:rs), m) = (m2 + 1, s1 ++ s2 ++ [SetMarker (m2 + 1, Or(Marker m1, Marker m2))]) 131 135 where 132 136 (m1, s1) = mre2pablo_helper(r1, m) 133 137 (m2, s2) = mre2pablo_helper(Alt rs, m1) 134 138 135 re2pablo_helper(Rep (CC c) 0 1, m) =139 re2pablo_helper(Rep (CC c) 0 Unbounded, m) = 136 140 (m + 1, [SetMarker(m + 1, MatchStar(Marker(m), CharClass(c)))]) 137 141 138 re2pablo_helper(Rep r 0 j, m) =139 j < 0 == (m1, [SetMarker(m + 1, Marker m), While (Marker (m+1), s1 ++ [SetMarker(m + 1, And(Marker m1 ,Not(Marker (m+1))))] )])140 otherwise == (Or(m1, m2), s1 ++ s2)142 re2pablo_helper(Rep r 0 ub, m) 143  ub < 0 = (m1, [SetMarker(m + 1, Marker m), While (Marker (m+1), s1 ++ [SetMarker(m + 1, And(Marker m1 ,Not(Marker (m+1))))] )]) 144  otherwise = (m + 1, s1 ++ s2 ++ [SetMarker (m + 1, Or(Marker m1, Marker m2))]) 141 145 where 142 146 (m1, s1) = mre2pablo_helper(r, m+1) 143 (m2, s2) = mre2pablo_helper(Rep r 0 ( j1), m1)147 (m2, s2) = mre2pablo_helper(Rep r 0 (upper_bound_dec ub), m1) 144 148 145 re2pablo_helper(Rep r i j, m) = (m2, s1 ++ s2)149 re2pablo_helper(Rep r lb ub, m) = (m2, s1 ++ s2) 146 150 where 147 151 (m1, s1) = mre2pablo_helper(r, m) 148 (m2, s2) = mre2pablo_helper(Rep r ( i1) (j1), m1)152 (m2, s2) = mre2pablo_helper(Rep r (lb  1) (upper_bound_dec ub), m1) 149 153
Note: See TracChangeset
for help on using the changeset viewer.