Changeset 3856 for proto/RE/Haskell


Ignore:
Timestamp:
Jun 4, 2014, 4:22:30 PM (5 years ago)
Author:
cameron
Message:

Simplify to use -1 as upperbound

Location:
proto/RE/Haskell
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/CanonicalRE.hs

    r3827 r3856  
    66-- Robert D. Cameron, 2013
    77
    8 module CanonicalRE (RE(..), RepLimit(..)) where
     8module CanonicalRE (RE(..), unboundedRep) where
    99
    1010import SparseCharSet
     
    1212-- RE is the data type for regular expressions
    1313
    14 data RE = CC SparseCharClass | Start | End | Seq [RE] | Alt [RE] | Rep (RE, Int, RepLimit)
     14data RE = CC SparseCharClass | Start | End | Seq [RE] | Alt [RE] | Rep (RE, Int, Int)
    1515          deriving Show
    16 data RepLimit = UpperBound Int | Unbounded deriving Show
     16
     17--   unbounded repetition is represented by -1
     18unboundedRep = -1
     19
    1720
    1821-- In the following comments, CC String is used instead of CC SparseCharClass for
     
    2427-- Seq [CC "abcd", CC "e", CC "f", CC "ghkl"] represents the regexp [a-d]ef[ghkl]
    2528-- Alt [Seq[CC "r", CC "e", CC "d"],  Seq[CC "b", CC "l", CC "u", CC "e"]] represents red|blue
    26 -- Rep (CC "a", 0, UpperBound 1) represents a?
    27 -- Rep (Seq[CC "a", CC "b", CC "c"], 0, Unbounded)  represents (abc)*, (without substring capture)
    28 -- Rep (CC "abcedefghijklmnopqrstuvwxyz", 1, Unbounded) represents [a-z]+
    29 -- Rep (CC "ab", 5, Unbounded) represents [ab]{5,}
    30 -- Rep (CC "ha", 5, UpperBound 10) represents [ha]{5,10}
     29-- Rep (r, lb, ub) represents repetition of at least lb and at most ub occurrences of r
     30-- Rep (CC "a", 0, 1) represents a?
     31-- Rep (Seq[CC "a", CC "b", CC "c"], 0, unboundedRep)  represents (abc)*, (without substring capture)
     32-- Rep (CC "abcedefghijklmnopqrstuvwxyz", 1, unboundedRep) represents [a-z]+
     33-- Rep (CC "ab", 5, unboundedRep) represents [ab]{5,}
     34-- Rep (CC "ha", 5, 10) represents [ha]{5,10}
    3135
    3236-- Special cases:
  • proto/RE/Haskell/Nullable.hs

    r3600 r3856  
    2424--- removeNullablePrefix takes a regular expression and returns
    2525--- the version of it that has been transformed to remove any nullable
    26 --- prefixeProto.  A prefix p is nullable if it can possibly match the empty
     26--- prefix.  A prefix p is nullable if it can possibly match the empty
    2727--- string, i.e., minMatchLen(p) = 0.  For example, in the regexp
    2828--- [a-z]*-[0-9]+ the prefix [a-z]* is nullable.
     
    4040removeNullablePrefix (Rep (r, lb, ub))
    4141   | minMatchLen(r) == 0  = Seq []
    42    | otherwise            = Seq [removeNullablePrefix(r), Rep (r, lb-1, upper_bound_dec ub)]
    43 
    44 upper_bound_dec :: RepLimit -> RepLimit
    45 upper_bound_dec (UpperBound u) = UpperBound (u - 1)
    46 upper_bound_dec Unbounded = Unbounded
     42   | otherwise            = Seq [removeNullablePrefix(r), Rep (r, lb-1, lb-1)]
    4743
    4844
  • proto/RE/Haskell/REcompile.hs

    r3855 r3856  
    1 -- Module REcompile is a research compiler for prototyping
     1-- Module PabloScompile is a research compiler for prototyping
    22-- algorithms and ideas in regular expression processing using
    33-- parallel bit streams and MatchStar.
     
    99-- Robert D. Cameron, 2014
    1010
    11 module REcompile (compile, searchRE, searchRegExp) where
     11module PabloScompile (s2b) where
    1212       
    1313import Data.Char
    14 import SparseCharSet
    15 import CanonicalRE
    16 import REparse
    1714import RunPablo
     15
     16data PabloIR = IRand(BlockVar, BlockVar, BlockVar),
     17
     18
     19Assign(String, PabloE) |  If (PabloE, [PabloS])| While (PabloE, [PabloS])
     20   deriving Show
     21
    1822
    1923--
     
    6569rep_helper :: (RE, Int, RepLimit, CodeGenState) -> CodeGenState
    6670
    67 eol_CC = CharClass([CharRange(ord '\n', ord '\n')])
    68 
    6971compile(re) = re2pablo_helper(re, (env, [Assign(marker, All(1))], marker))
    7072  where
     
    7779   where
    7880     (newenv, newsym) = gensym(env, "marker")
    79      new_stmt = Assign (newsym, Advance(And(Var(last_marker), CharClass(c)), 1))
     81     new_stmt = Assign (newsym, Advance(And(Var(last_marker), CharClass(c))))
    8082
    8183-- To match "^" we must be at the start of line, i.e., one past
     
    8789   where
    8890     (newenv, newsym) = gensym(env, "start_of_line_marker")
    89      new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(eol_CC), 1))))
     91     new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(CharClass("\n"))))))
    9092
    9193-- To match "$" we must have reached end of line.
     
    9496   where
    9597     (newenv, newsym) = gensym(env, "end_of_line_marker")
    96      new_stmt = Assign (newsym, And(Var(last_marker), eol_CC))
     98     new_stmt = Assign (newsym, And(Var(last_marker), CharClass("\n")))
    9799
    98100-- For the structured types (Seq, Alt, Rep), just call the specific helper.
     
    134136
    135137-- For Kleene Star character class repetition, use MatchStar
    136 rep_helper(CC c, 0, Unbounded, (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
     138rep_helper(CC c, 0, unboundedRep, (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
    137139   where
    138140     (newenv, newsym) = gensym(env, "marker")
     
    140142
    141143-- For general Kleene Star, we need a while loop.
    142 rep_helper(r, 0, Unbounded, (env, stmts, last_marker)) = (newenv, stmts ++ while_init ++ [loop_stmt], while_accum)
    143   where
    144     (e1, while_test) = gensym(env, "while_test")
     144rep_helper(r, 0, unboundedRep, (env, stmts, last_marker)) = (newenv, stmts ++ while_init ++ [loop_stmt], while_accum)
     145  where    (e1, while_test) = gensym(env, "while_test")
    145146    (e2, while_accum) = gensym(e1, "while_accum")
    146147    (newenv, loop_body_stmts, loop_result) = re2pablo_helper(r, (e2, [], while_test))
     
    150151    loop_stmt = While(Var(while_test), loop_body_stmts ++ repeat)
    151152
    152 rep_helper(r, lb, Unbounded, cg_state) = rep_helper(r, lb-1, Unbounded, cg1_state)
     153rep_helper(r, lb, unboundedRep, cg_state) = rep_helper(r, lb-1, Unbounded, cg1_state)
    153154  where
    154155   cg1_state = re2pablo_helper(r, cg_state)
     
    156157-- Now Bounded Repetition: use multiple copies
    157158   
    158 rep_helper(r, 0, UpperBound 0, cg_state) = cg_state
     159rep_helper(r, 0, 0, cg_state) = cg_state
    159160
    160 rep_helper(r, 0, UpperBound ub, (env, stmts, last_marker)) = (newenv, new_stmts, newsym)
     161rep_helper(r, 0, ub, (env, stmts, last_marker)) = (newenv, new_stmts, newsym)
    161162  where
    162163   (e1, s1, rep1_marker) = re2pablo_helper(r, (env, stmts, last_marker))
    163    (e2, s2, rep2plus_marker) = rep_helper(r, 0, UpperBound(ub-1), (e1, s1, rep1_marker))
     164   (e2, s2, rep2plus_marker) = rep_helper(r, 0, ub-1, (e1, s1, rep1_marker))
    164165   (newenv, newsym) = gensym(e2, "alt_marker")
    165166   new_stmts = s2 ++ [Assign (newsym, Or(Var(last_marker), Var(rep2plus_marker)))]
    166167
    167 rep_helper(r, lb, UpperBound ub, cg_state) = rep_helper(r, lb-1, UpperBound(ub-1), cg1_state)
     168rep_helper(r, lb, ub, cg_state) = rep_helper(r, lb-1, ub-1, cg1_state)
    168169  where
    169170   cg1_state = re2pablo_helper(r, cg_state)
  • proto/RE/Haskell/REparse.hs

    r3828 r3856  
    103103
    104104extend_item :: (RE, String) -> (ParseResult, String)
    105 extend_item(r, '*':more) = extend_item(Rep(r, 0, Unbounded), more)
    106 extend_item(r, '?':more) = extend_item(Rep(r, 0, UpperBound 1), more)
    107 extend_item(r, '+':more) = extend_item(Rep(r, 1, Unbounded), more)
     105extend_item(r, '*':more) = extend_item(Rep(r, 0, unboundedRep), more)
     106extend_item(r, '?':more) = extend_item(Rep(r, 0, 1), more)
     107extend_item(r, '+':more) = extend_item(Rep(r, 1, unboundedRep), more)
    108108extend_item(r, '{':more) =
    109109  case parseInt(more) of
    110     (Just i, '}' : even_more) -> extend_item(Rep(r, i, UpperBound i), even_more)
    111     (Just i, ',':'}': even_more) -> extend_item(Rep(r, i, Unbounded), even_more)
     110    (Just i, '}' : even_more) -> extend_item(Rep(r, i, i), even_more)
     111    (Just i, ',':'}': even_more) -> extend_item(Rep(r, i, unboundedRep), even_more)
    112112    (Just i, ',': even_more) ->
    113113      case parseInt(even_more) of
    114         (Just j, '}' : remaining) -> extend_item(Rep(r, i, UpperBound j), remaining)
     114        (Just j, '}' : remaining) -> extend_item(Rep(r, i, j), remaining)
    115115        _ -> (ParseFailure "Bad upper bound", even_more)
    116116    _ -> (ParseFailure "Bad lower bound", more)
Note: See TracChangeset for help on using the changeset viewer.