Changeset 3595
 Timestamp:
 Dec 24, 2013, 7:21:38 AM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/RE/doc/CanonicalRE.hs
r3594 r3595 12 12 import Data.Char 13 13 14 data RepLimit = UpperBound Int  Unbounded deriving Show 14 data RepLimit = UpperBound Int  UnBounded deriving Show 15 upper_bound_dec :: RepLimit > RepLimit 15 16 upper_bound_dec (UpperBound j) = UpperBound (j  1) 16 upper_bound_dec Un bounded = Unbounded17 upper_bound_dec UnBounded = UnBounded 17 18 18 19 data RE = CC String  Seq [RE]  Alt [RE]  Rep RE Int RepLimit  19 20 Start  End  lookbehind/lookahead assertions for newline 20  Rep r i j represents bounded repetition, with lower bound i and21  Rep r i j represents UpperBound repetition, with lower bound i and 21 22  upper bound j. We use the convention that any negative value of 22  j represents unbounded repetition.23  j represents UnBounded repetition. 23 24 24 25 canonicalize :: Proto.RE > RE … … 32 33 canonicalize (Proto.Alt rs) = Alt (map canonicalize rs) 33 34 canonicalize (Proto.Opt r) = Rep (canonicalize r) 0 (UpperBound 1) 34 canonicalize (Proto.Kstar r) = Rep (canonicalize r) 0 Un bounded35 canonicalize (Proto.Kplus r) = Rep (canonicalize r) 1 Un bounded36 canonicalize (Proto.OrMore i r) = Rep (canonicalize r) i Un bounded35 canonicalize (Proto.Kstar r) = Rep (canonicalize r) 0 UnBounded 36 canonicalize (Proto.Kplus r) = Rep (canonicalize r) 1 UnBounded 37 canonicalize (Proto.OrMore i r) = Rep (canonicalize r) i UnBounded 37 38 canonicalize (Proto.Bounded i j r) = Rep (canonicalize r) i (UpperBound j) 38 39 … … 95 96  is the next one in sequence. 96 97 97 mre2pablo_helper(CC(c), m) =98 re2pablo_helper(CC(c), m) = 98 99 (m + 1, [SetMarker (m + 1, Advance(And(Marker(m), CharClass(c))))]) 99 100 … … 121 122 re2pablo_helper(Seq (r1:rs), m) = (m2, s1 ++ s2) 122 123 where 123 (m1, s1) = mre2pablo_helper(r1, m)124 (m2, s2) = mre2pablo_helper(Seq rs, m1)124 (m1, s1) = re2pablo_helper(r1, m) 125 (m2, s2) = re2pablo_helper(Seq rs, m1) 125 126 126 127  Alt[r] has a single alternative r to match, just match … … 132 133  does. 133 134 134 re2pablo_helper(Alt (r1:rs), m) = (m2 + 1, s 1 ++ s2 ++ [SetMarker (m2 + 1, Or(Marker m1, Marker m2))])135 re2pablo_helper(Alt (r1:rs), m) = (m2 + 1, save ++ s1 ++ s2 ++ final) 135 136 where 136 (m1, s1) = mre2pablo_helper(r1, m) 137 (m2, s2) = mre2pablo_helper(Alt rs, m1) 137 (m1, s1) = re2pablo_helper(r1, m) 138 (m2, s2) = re2pablo_helper(Alt rs, m1+1) 139 save = [SetMarker(m1 + 1, Marker(m))] 140 final = [SetMarker(m2 + 1, Or(Marker(m1), Marker(m2)))] 138 141 139 re2pablo_helper(Rep (CC c) 0 Un bounded, m) =142 re2pablo_helper(Rep (CC c) 0 UnBounded, m) = 140 143 (m + 1, [SetMarker(m + 1, MatchStar(Marker(m), CharClass(c)))]) 141 144 142 re2pablo_helper(Rep r 0 ub, m) 143  ub < 0 = (m1, [SetMarker(m + 1, Marker m), While (Marker (m+1), s1 ++ [SetMarker(m + 1, And(Marker m1 ,Not(Marker (m+1))))] )]) 144  otherwise = (m + 1, s1 ++ s2 ++ [SetMarker (m + 1, Or(Marker m1, Marker m2))]) 145 re2pablo_helper(Rep r 0 UnBounded, m) = 146 (m1+1, save ++ [While (Marker(m+1), s1 ++ loopfinal)]) 145 147 where 146 (m1, s1) = mre2pablo_helper(r, m+1) 147 (m2, s2) = mre2pablo_helper(Rep r 0 (upper_bound_dec ub), m1) 148 (m1, s1) = re2pablo_helper(r, m+1) 149 save = [SetMarker(m+1, Marker(m)), SetMarker(m1+1, Marker(m))] 150 loopfinal = [SetMarker(m+1, And(Marker(m1), Not(Marker(m1+1)))), SetMarker(m1+1, Or(Marker(m1+1), Marker(m1)))] 151 152 re2pablo_helper(Rep r 0 (UpperBound 0), m) = (m, []) 148 153 149 re2pablo_helper(Rep r lb ub, m) = (m2, s1 ++ s2) 154 re2pablo_helper(Rep r 0 (UpperBound ub), m) = 155 (m2 + 1, s1 ++ s2 ++ [SetMarker (m2 + 1, Or(Marker m1, Marker m2))]) 150 156 where 151 (m1, s1) = mre2pablo_helper(r, m)152 (m2, s2) = mre2pablo_helper(Rep r (lb  1) (upper_bound_dec ub), m1)157 (m1, s1) = re2pablo_helper(r, m) 158 (m2, s2) = re2pablo_helper(Rep r 0 (UpperBound(ub1)), m1) 153 159 160 re2pablo_helper(Rep r lb (UpperBound ub), m) = (m2, s1 ++ s2) 161 where 162 (m1, s1) = re2pablo_helper(r, m) 163 (m2, s2) = re2pablo_helper(Rep r (lb1) (UpperBound(ub1)), m1) 164
Note: See TracChangeset
for help on using the changeset viewer.