Changeset 3593 for proto


Ignore:
Timestamp:
Dec 23, 2013, 9:44:54 AM (6 years ago)
Author:
cameron
Message:

Initial match prototype on canonical rep

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/doc/CanonicalRE.hs

    r3590 r3593  
    6868   | otherwise            = Seq [removeNullablePrefix(r), Rep r (i-1) (j-1)]
    6969
     70
     71--
     72-- Regular Expression Compiler
     73
     74data PabloE = Marker(Int) | And(PabloE, PabloE) | Or(PabloE, PabloE) | Not(PabloE)
     75              | CharClass(String) | Advance(PabloE) | MatchStar(PabloE, PabloE)
     76   deriving Show
     77
     78data PabloS = SetMarker(Int, PabloE) | ReturnMarker(Int)
     79              | If (PabloE, [PabloS])| While (PabloE, [PabloS])
     80   deriving Show
     81
     82compile :: RE -> [PabloS]
     83
     84compile(e) = stmts ++ [ReturnMarker m]
     85  where (m, stmts) = re2pablo_helper(e, 0)
     86
     87re2pablo_helper :: (RE, Int) -> (Int, [PabloS])
     88
     89-- To match a character class, use bitwise and of the marker and
     90-- character class, then advance 1.  The output marker variable
     91-- is the next one in sequence.
     92
     93mre2pablo_helper(CC(c), m) =
     94   (m + 1, [SetMarker (m + 1, Advance(And(Marker(m), CharClass(c))))])
     95
     96-- To match "^" we must be at the start of line, i.e., one past
     97-- a newline character or at the beginning of file.  We use the
     98-- trick of advancing the negated newline stream to determine these
     99-- positions.
     100--
     101re2pablo_helper(Start, m) =
     102  (m+1, [SetMarker(m+1, And(m, Not(Advance(Not(CharClass('\n'))))))])
     103
     104-- To match "$" we must have reached end of line.
     105--
     106re2pablo_helper(End, m) =
     107  (m+1, [SetMarker(m+1, And(m, CharClass('\n')))])
     108
     109-- Seq [] is the empty regexp which matches the empty string. 
     110-- We just return the current marker unchanged, with no statements
     111-- needed to compute it.
     112--
     113re2pablo_helper(Seq [], m) = (m, [])
     114
     115-- Seq (r1:rs): match r1, the first subexpression, then match
     116-- the rest, concatenating the statements.
     117re2pablo_helper(Seq (r1:rs), m) = (m2, s1 ++ s2)
     118  where
     119   (m1, s1) = mre2pablo_helper(r1, m)
     120   (m2, s2) = mre2pablo_helper(Seq rs, m1)
     121
     122-- Alt[r] has a single alternative r to match, just match
     123-- it with no other possibility.
     124re2pablo_helper(Alt [r], m) = re2pablo_helper(r, m)
     125
     126-- Alt (r1:rs): we have r1 and some other alternatives, the
     127-- match succeeds if either r1 matches or any of the others
     128-- does.
     129
     130re2pablo_helper(Alt (r1:rs), m) = (Or(m1, m2), s1 ++ s2)
     131  where
     132   (m1, s1) = mre2pablo_helper(r1, m)
     133   (m2, s2) = mre2pablo_helper(Alt rs, m1)
     134
     135re2pablo_helper(Rep (CC c) 0 -1, m) =
     136   (m + 1, [SetMarker(m + 1, MatchStar(Marker(m), CharClass(c)))])
     137
     138re2pablo_helper(Rep r 0 j, m) =
     139  j < 0 == (m1, [SetMarker(m + 1, Marker m), While (Marker (m+1), s1 ++ [SetMarker(m + 1, And(Marker m1 ,Not(Marker (m+1))))] )])
     140  otherwise == (Or(m1, m2), s1 ++ s2)
     141  where
     142   (m1, s1) = mre2pablo_helper(r, m+1)
     143   (m2, s2) = mre2pablo_helper(Rep r 0 (j-1), m1)
     144
     145re2pablo_helper(Rep r i j, m) =  (m2, s1 ++ s2)
     146  where
     147   (m1, s1) = mre2pablo_helper(r, m)
     148   (m2, s2) = mre2pablo_helper(Rep r (i-1) (j-1), m1)
     149
Note: See TracChangeset for help on using the changeset viewer.