Changeset 3902 for proto/RE/Haskell


Ignore:
Timestamp:
Jun 29, 2014, 5:54:28 AM (5 years ago)
Author:
cameron
Message:

Avoid duplicating the repeated expression, if it can't be simplified

Location:
proto/RE/Haskell
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/CanonicalRE.hs

    r3900 r3902  
    1212-- RE is the data type for regular expressions
    1313
    14 data RE = CC SparseCharClass | Start | End | Seq [RE] | Alt [RE] | Rep (RE, Int, Int)
     14data RE = CC (Bool, SparseCharClass) | Start | End | Seq [RE] | Alt [RE] | Rep (RE, Int, Int)
    1515          deriving Show
    1616
     
    2323-- illustrative purposes.
    2424--
    25 -- CC "abcd" represents the character class with the 4 characters a, b, c and d, i.e., [a-d].
     25-- CC (True, [CharRange(97,100)]) represents the character class with the 4 characters a, b, c and d, i.e., [a-d].
     26-- CC (False, [CharRange(97,100)]) represents the character class with all characters but a, b, c and d, i.e., [^a-d].
    2627-- Start represents the ^ metacharacter for start of line or string matching
    2728-- End represents the $ metacharacter for end of line or string matching
     
    6263mkAltList [] = []
    6364mkAltList ((Alt rs): more) = mkAltList(rs ++ more)
    64 mkAltList (CC(cs1):CC(cs2):more) = mkAltList(CC(joinCharSets(cs1, cs2)): more)
     65mkAltList (CC(True, cs1):CC(True, cs2):more) = mkAltList(CC(True, joinCharSets(cs1, cs2)): more)
     66mkAltList (CC(False, cs1):CC(False, cs2):more) = mkAltList(CC(False, joinCharSets(cs1, cs2)): more)
    6567mkAltList (CC(cs1):a2:more) = mkAltList(a2:CC(cs1):more)
    6668mkAltList (r:rs) = r:(mkAltList(rs))
  • proto/RE/Haskell/REcompile.hs

    r3857 r3902  
    6666bounded_rep_helper :: (RE, Int, Int, CodeGenState) -> CodeGenState
    6767
    68 eol_CC = CharClass([CharRange(ord '\n', ord '\n')])
     68eol_CC = CharClass(True, [CharRange(ord '\n', ord '\n')])
    6969
    7070compile(re) = re2pablo_helper(re, (env, [Assign(marker, All(1))], marker))
     
    7575-- character class, then advance 1. 
    7676
    77 re2pablo_helper(CC(c), (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
     77re2pablo_helper(CC(True, c), (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
    7878   where
    7979     (newenv, newsym) = gensym(env, "marker")
    8080     new_stmt = Assign (newsym, Advance(And(Var(last_marker), CharClass(c)), 1))
     81
     82re2pablo_helper(CC(False, c), (env, stmts, last_marker)) = (newenv, stmts ++ [new_stmt], newsym)
     83   where
     84     (newenv, newsym) = gensym(env, "marker")
     85     new_stmt = Assign (newsym, Advance(And(Var(last_marker), Nor(eol_CC, CharClass(c))), 1))
    8186
    8287-- To match "^" we must be at the start of line, i.e., one past
  • proto/RE/Haskell/REparse.hs

    r3856 r3902  
    179179
    180180parseCC_body1(a, [], ccSoFar) = (ParseFailure "Unclosed character class", [])
    181 parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(a), ccSoFar))), more)
     181parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (True, insert1(ord(a), ccSoFar))), more)
    182182parseCC_body1(a, [_], ccSoFar) = (ParseFailure "Unclosed character class", [])
    183 parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(a), insert1(ord('-'), ccSoFar)))), more)
     183parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC (True, insert1(ord(a), insert1(ord('-'), ccSoFar)))), more)
    184184parseCC_body1(a, '-':b:more, ccSoFar) = parseCC_body0(more, insertRange(ord(a), ord(b), ccSoFar))
    185185parseCC_body1(a, b:more, ccSoFar) = parseCC_body1(b, more, insert1(ord(a), ccSoFar))
    186186
    187187parseCC_body0([], ccSoFar) = (ParseFailure "Unclosed character class", [])
    188 parseCC_body0(']':more, ccSoFar) = (ParseSuccess (CC ccSoFar), more)
    189 parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord('-'), ccSoFar))), more)
     188parseCC_body0(']':more, ccSoFar) = (ParseSuccess (CC (True, ccSoFar)), more)
     189parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (True, insert1(ord('-'), ccSoFar))), more)
    190190parseCC_body0('-':more, ccSoFar) = (ParseFailure "Bad range in character class", more)
    191191parseCC_body0(a:more, ccSoFar) = parseCC_body1(a, more, ccSoFar)
    192192
    193193
    194 negateCharClassResult(ParseSuccess (CC s), remaining) =
     194negateCharClassResult(ParseSuccess (CC(True, s)), remaining) = ParseSuccess (CC(False, s))
     195negateCharClassResult(ParseSuccess (CC(False, s)), remaining) = ParseSuccess (CC(True, s))
    195196-- should be the following, but there are issues
    196197--   (ParseSuccess (CC (negateClass s)), remaining)
Note: See TracChangeset for help on using the changeset viewer.