 Timestamp:
 Dec 23, 2013, 9:44:54 AM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/RE/doc/CanonicalRE.hs
r3590 r3593 68 68  otherwise = Seq [removeNullablePrefix(r), Rep r (i1) (j1)] 69 69 70 71  72  Regular Expression Compiler 73 74 data PabloE = Marker(Int)  And(PabloE, PabloE)  Or(PabloE, PabloE)  Not(PabloE) 75  CharClass(String)  Advance(PabloE)  MatchStar(PabloE, PabloE) 76 deriving Show 77 78 data PabloS = SetMarker(Int, PabloE)  ReturnMarker(Int) 79  If (PabloE, [PabloS]) While (PabloE, [PabloS]) 80 deriving Show 81 82 compile :: RE > [PabloS] 83 84 compile(e) = stmts ++ [ReturnMarker m] 85 where (m, stmts) = re2pablo_helper(e, 0) 86 87 re2pablo_helper :: (RE, Int) > (Int, [PabloS]) 88 89  To match a character class, use bitwise and of the marker and 90  character class, then advance 1. The output marker variable 91  is the next one in sequence. 92 93 mre2pablo_helper(CC(c), m) = 94 (m + 1, [SetMarker (m + 1, Advance(And(Marker(m), CharClass(c))))]) 95 96  To match "^" we must be at the start of line, i.e., one past 97  a newline character or at the beginning of file. We use the 98  trick of advancing the negated newline stream to determine these 99  positions. 100  101 re2pablo_helper(Start, m) = 102 (m+1, [SetMarker(m+1, And(m, Not(Advance(Not(CharClass('\n'))))))]) 103 104  To match "$" we must have reached end of line. 105  106 re2pablo_helper(End, m) = 107 (m+1, [SetMarker(m+1, And(m, CharClass('\n')))]) 108 109  Seq [] is the empty regexp which matches the empty string. 110  We just return the current marker unchanged, with no statements 111  needed to compute it. 112  113 re2pablo_helper(Seq [], m) = (m, []) 114 115  Seq (r1:rs): match r1, the first subexpression, then match 116  the rest, concatenating the statements. 117 re2pablo_helper(Seq (r1:rs), m) = (m2, s1 ++ s2) 118 where 119 (m1, s1) = mre2pablo_helper(r1, m) 120 (m2, s2) = mre2pablo_helper(Seq rs, m1) 121 122  Alt[r] has a single alternative r to match, just match 123  it with no other possibility. 124 re2pablo_helper(Alt [r], m) = re2pablo_helper(r, m) 125 126  Alt (r1:rs): we have r1 and some other alternatives, the 127  match succeeds if either r1 matches or any of the others 128  does. 129 130 re2pablo_helper(Alt (r1:rs), m) = (Or(m1, m2), s1 ++ s2) 131 where 132 (m1, s1) = mre2pablo_helper(r1, m) 133 (m2, s2) = mre2pablo_helper(Alt rs, m1) 134 135 re2pablo_helper(Rep (CC c) 0 1, m) = 136 (m + 1, [SetMarker(m + 1, MatchStar(Marker(m), CharClass(c)))]) 137 138 re2pablo_helper(Rep r 0 j, m) = 139 j < 0 == (m1, [SetMarker(m + 1, Marker m), While (Marker (m+1), s1 ++ [SetMarker(m + 1, And(Marker m1 ,Not(Marker (m+1))))] )]) 140 otherwise == (Or(m1, m2), s1 ++ s2) 141 where 142 (m1, s1) = mre2pablo_helper(r, m+1) 143 (m2, s2) = mre2pablo_helper(Rep r 0 (j1), m1) 144 145 re2pablo_helper(Rep r i j, m) = (m2, s1 ++ s2) 146 where 147 (m1, s1) = mre2pablo_helper(r, m) 148 (m2, s2) = mre2pablo_helper(Rep r (i1) (j1), m1) 149
Note: See TracChangeset
for help on using the changeset viewer.