Changeset 3857 for proto/RE


Ignore:
Timestamp:
Jun 4, 2014, 6:05:46 PM (5 years ago)
Author:
cameron
Message:

Bounded repetition vs Unbounded repetition clarity

Location:
proto/RE/Haskell
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/CanonicalRE.hs

    r3856 r3857  
    1616
    1717--   unbounded repetition is represented by -1
     18unboundedRep :: Int
    1819unboundedRep = -1
    1920
  • proto/RE/Haskell/REcompile.hs

    r3856 r3857  
    1 -- Module PabloScompile is a research compiler for prototyping
     1-- Module REcompile is a research compiler for prototyping
    22-- algorithms and ideas in regular expression processing using
    33-- parallel bit streams and MatchStar.
     
    99-- Robert D. Cameron, 2014
    1010
    11 module PabloScompile (s2b) where
     11module REcompile (compile, searchRE, searchRegExp) where
    1212       
    1313import Data.Char
     14import SparseCharSet
     15import CanonicalRE
     16import REparse
    1417import RunPablo
    15 
    16 data PabloIR = IRand(BlockVar, BlockVar, BlockVar),
    17 
    18 
    19 Assign(String, PabloE) |  If (PabloE, [PabloS])| While (PabloE, [PabloS])
    20    deriving Show
    21 
    2218
    2319--
     
    6763seq_helper :: ([RE], CodeGenState) -> CodeGenState
    6864alt_helper :: ([RE], CodeGenState) -> CodeGenState
    69 rep_helper :: (RE, Int, RepLimit, CodeGenState) -> CodeGenState
     65unbounded_rep_helper :: (RE, Int, CodeGenState) -> CodeGenState
     66bounded_rep_helper :: (RE, Int, Int, CodeGenState) -> CodeGenState
     67
     68eol_CC = CharClass([CharRange(ord '\n', ord '\n')])
    7069
    7170compile(re) = re2pablo_helper(re, (env, [Assign(marker, All(1))], marker))
     
    7978   where
    8079     (newenv, newsym) = gensym(env, "marker")
    81      new_stmt = Assign (newsym, Advance(And(Var(last_marker), CharClass(c))))
     80     new_stmt = Assign (newsym, Advance(And(Var(last_marker), CharClass(c)), 1))
    8281
    8382-- To match "^" we must be at the start of line, i.e., one past
     
    8988   where
    9089     (newenv, newsym) = gensym(env, "start_of_line_marker")
    91      new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(CharClass("\n"))))))
     90     new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(eol_CC), 1))))
    9291
    9392-- To match "$" we must have reached end of line.
     
    9695   where
    9796     (newenv, newsym) = gensym(env, "end_of_line_marker")
    98      new_stmt = Assign (newsym, And(Var(last_marker), CharClass("\n")))
     97     new_stmt = Assign (newsym, And(Var(last_marker), eol_CC))
    9998
    10099-- For the structured types (Seq, Alt, Rep), just call the specific helper.
     
    102101re2pablo_helper(Seq rs, cg_state) = seq_helper(rs, cg_state)
    103102re2pablo_helper(Alt rs, cg_state) = alt_helper(rs, cg_state)
    104 re2pablo_helper(Rep (r, lb, ub), cg_state) = rep_helper(r, lb, ub, cg_state)
     103re2pablo_helper(Rep (r, lb, ub), cg_state)
     104  | ub == unboundedRep  = unbounded_rep_helper(r, lb, cg_state)
     105  | otherwise           = bounded_rep_helper(r, lb, ub, cg_state)
    105106
    106107
     
    133134   (e2, s2, alt2_marker) = alt_helper(rs, (e1, s1, last_marker))
    134135   (newenv, newsym) = gensym(e2, "alt_marker")
    135    new_stmts = s2 ++ [Assign (newsym, Or(Var(alt1_marker), Var(alt2_marker)))]
     136   new_stmts = s2 ++ [Assign (newsym, Or(Var(alt1_marker), Var(alt2_marker)))] 
    136137
    137138-- For Kleene Star character class repetition, use MatchStar
    138 rep_helper(CC c, 0, unboundedRep, (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
     139unbounded_rep_helper(CC c, 0, cg_state) = (newenv, stmts ++ [new_stmt], newsym)
    139140   where
     141     (env, stmts, last_marker) = cg_state
    140142     (newenv, newsym) = gensym(env, "marker")
    141143     new_stmt = Assign (newsym, MatchStar(Var(last_marker), CharClass(c)))
    142144
    143145-- For general Kleene Star, we need a while loop.
    144 rep_helper(r, 0, unboundedRep, (env, stmts, last_marker)) = (newenv, stmts ++ while_init ++ [loop_stmt], while_accum)
    145   where    (e1, while_test) = gensym(env, "while_test")
     146unbounded_rep_helper(r, 0, cg_state) = (newenv, stmts ++ while_init ++ [loop_stmt], while_accum)
     147  where
     148    (env, stmts, last_marker) = cg_state
     149    (e1, while_test) = gensym(env, "while_test")
    146150    (e2, while_accum) = gensym(e1, "while_accum")
    147151    (newenv, loop_body_stmts, loop_result) = re2pablo_helper(r, (e2, [], while_test))
     
    151155    loop_stmt = While(Var(while_test), loop_body_stmts ++ repeat)
    152156
    153 rep_helper(r, lb, unboundedRep, cg_state) = rep_helper(r, lb-1, Unbounded, cg1_state)
     157unbounded_rep_helper(r, lb, cg_state) = unbounded_rep_helper(r, lb-1, cg1_state)
    154158  where
    155159   cg1_state = re2pablo_helper(r, cg_state)
     
    157161-- Now Bounded Repetition: use multiple copies
    158162   
    159 rep_helper(r, 0, 0, cg_state) = cg_state
     163bounded_rep_helper(r, 0, 0, cg_state) = cg_state
    160164
    161 rep_helper(r, 0, ub, (env, stmts, last_marker)) = (newenv, new_stmts, newsym)
     165bounded_rep_helper(r, 0, ub, cg_state) = (newenv, new_stmts, newsym)
    162166  where
     167   (env, stmts, last_marker) = cg_state
    163168   (e1, s1, rep1_marker) = re2pablo_helper(r, (env, stmts, last_marker))
    164    (e2, s2, rep2plus_marker) = rep_helper(r, 0, ub-1, (e1, s1, rep1_marker))
     169   (e2, s2, rep2plus_marker) = bounded_rep_helper(r, 0, ub-1, (e1, s1, rep1_marker))
    165170   (newenv, newsym) = gensym(e2, "alt_marker")
    166171   new_stmts = s2 ++ [Assign (newsym, Or(Var(last_marker), Var(rep2plus_marker)))]
    167172
    168 rep_helper(r, lb, ub, cg_state) = rep_helper(r, lb-1, ub-1, cg1_state)
     173bounded_rep_helper(r, lb, ub, cg_state) = bounded_rep_helper(r, lb-1, ub-1, cg1_state)
    169174  where
    170175   cg1_state = re2pablo_helper(r, cg_state)
Note: See TracChangeset for help on using the changeset viewer.