Changeset 3826


Ignore:
Timestamp:
Apr 17, 2014, 6:20:11 PM (4 years ago)
Author:
cameron
Message:

Update for SparseCharSet?

Location:
proto/RE/Haskell
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/REcompile.hs

    r3615 r3826  
    1212       
    1313import Data.Char
     14import SparseCharSet
    1415import CanonicalRE
    1516import REparse
     
    6465rep_helper :: (RE, Int, RepLimit, CodeGenState) -> CodeGenState
    6566
     67eol_CC = CharClass([CharRange(ord '\n', ord '\n')])
     68
    6669compile(re) = re2pablo_helper(re, (env, [Assign(marker, All(1))], marker))
    6770  where
     
    8487   where
    8588     (newenv, newsym) = gensym(env, "start_of_line_marker")
    86      new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(CharClass("\n"))))))
     89     new_stmt = Assign (newsym, And(Var(last_marker), Not(Advance(Not(eol_CC)))))
    8790
    8891-- To match "$" we must have reached end of line.
     
    9194   where
    9295     (newenv, newsym) = gensym(env, "end_of_line_marker")
    93      new_stmt = Assign (newsym, And(Var(last_marker), CharClass("\n")))
     96     new_stmt = Assign (newsym, And(Var(last_marker), eol_CC))
    9497
    9598-- For the structured types (Seq, Alt, Rep), just call the specific helper.
  • proto/RE/Haskell/REparse.hs

    r3614 r3826  
    88       
    99import Data.Char
     10import SparseCharSet
    1011import CanonicalRE
    1112
     
    129130parseRE_unit(s) = parseCC(s)
    130131
     132-- Make a character class from a single character
     133cc1 c = let v = ord c in CC [CharRange(v, v)]
     134
    131135-- parseCC deals with individual characters (unitary character classes)
    132136-- and all other forms specifying classes of characters.
    133 parseCC('.':more) = (ParseSuccess (CC (map chr ([0..9]++[11..127]))), more)
     137parseCC('.':more) = (ParseSuccess (CC [CharRange(0,9), CharRange(11,127)]), more)
    134138--
    135139-- Any of the RE metacharacters may be represented using a backslash escape.
    136140--
    137 parseCC('\\':'?':more) = (ParseSuccess (CC("?")), more)
    138 parseCC('\\':'+':more) = (ParseSuccess (CC("+")), more)
    139 parseCC('\\':'*':more) = (ParseSuccess (CC("*")), more)
    140 parseCC('\\':'(':more) = (ParseSuccess (CC("(")), more)
    141 parseCC('\\':')':more) = (ParseSuccess (CC(")")), more)
    142 parseCC('\\':'{':more) = (ParseSuccess (CC("{")), more)
    143 parseCC('\\':'}':more) = (ParseSuccess (CC("}")), more)
    144 parseCC('\\':'[':more) = (ParseSuccess (CC("[")), more)
    145 parseCC('\\':']':more) = (ParseSuccess (CC("]")), more)
    146 parseCC('\\':'|':more) = (ParseSuccess (CC("|")), more)
    147 parseCC('\\':'.':more) = (ParseSuccess (CC(".")), more)
    148 parseCC('\\':'\\':more) = (ParseSuccess (CC("\\")), more)
     141parseCC('\\':'?':more) = (ParseSuccess (cc1('?')), more)
     142parseCC('\\':'+':more) = (ParseSuccess (cc1('+')), more)
     143parseCC('\\':'*':more) = (ParseSuccess (cc1('*')), more)
     144parseCC('\\':'(':more) = (ParseSuccess (cc1('(')), more)
     145parseCC('\\':')':more) = (ParseSuccess (cc1(')')), more)
     146parseCC('\\':'{':more) = (ParseSuccess (cc1('{')), more)
     147parseCC('\\':'}':more) = (ParseSuccess (cc1('}')), more)
     148parseCC('\\':'[':more) = (ParseSuccess (cc1('[')), more)
     149parseCC('\\':']':more) = (ParseSuccess (cc1(']')), more)
     150parseCC('\\':'|':more) = (ParseSuccess (cc1('|')), more)
     151parseCC('\\':'.':more) = (ParseSuccess (cc1('.')), more)
     152parseCC('\\':'\\':more) = (ParseSuccess (cc1('\\')), more)
    149153--
    150154-- Any other use of backslash is an error.
     
    159163parseCC(c:more)
    160164   | elem c "?+*(){}[]|"   = (ParseFailure "Metacharacter alone", c:more)
    161    | otherwise             = (ParseSuccess (CC [c]), more)
     165   | otherwise             = (ParseSuccess (cc1 c), more)
    162166
    163167
     
    166170-- parseCC_body1(c, s)
    167171parseCC_body :: String -> (ParseResult, String)
    168 parseCC_body1 :: (Char, String, String) -> (ParseResult, String)
    169 parseCC_body0 :: (String, String) -> (ParseResult, String)
     172parseCC_body1 :: (Char, String, SparseCharClass) -> (ParseResult, String)
     173parseCC_body0 :: (String, SparseCharClass) -> (ParseResult, String)
    170174
    171175parseCC_body([]) = (ParseFailure "Unclosed character class", [])
     
    175179
    176180parseCC_body1(a, [], ccSoFar) = (ParseFailure "Unclosed character class", [])
    177 parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (ccSoFar ++ [a])), more)
     181parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(']'), ccSoFar))), more)
    178182parseCC_body1(a, [_], ccSoFar) = (ParseFailure "Unclosed character class", [])
    179 parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC [a, '-']), more)
    180 parseCC_body1(a, '-':b:more, ccSoFar) = parseCC_body0(more, ccSoFar ++ (map chr [ord(a)..ord(b)]))
    181 parseCC_body1(a, b:more, ccSoFar) = parseCC_body1(b, more, ccSoFar ++ [a])
     183parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(a), insert1(ord('-'), ccSoFar)))), more)
     184parseCC_body1(a, '-':b:more, ccSoFar) = parseCC_body0(more, insertRange(ord(a), ord(b), ccSoFar))
     185parseCC_body1(a, b:more, ccSoFar) = parseCC_body1(b, more, insert1(ord(a), ccSoFar))
    182186
    183187parseCC_body0([], ccSoFar) = (ParseFailure "Unclosed character class", [])
    184188parseCC_body0(']':more, ccSoFar) = (ParseSuccess (CC ccSoFar), more)
    185 parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (ccSoFar ++ ['-'])), more)
     189parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord('-'), ccSoFar))), more)
    186190parseCC_body0('-':more, ccSoFar) = (ParseFailure "Bad range in character class", more)
    187191parseCC_body0(a:more, ccSoFar) = parseCC_body1(a, more, ccSoFar)
     
    189193
    190194negateCharClassResult(ParseSuccess (CC s), remaining) =
    191    (ParseSuccess (CC (filter (\c -> not(elem c s)) (map chr ([0..9]++[11..127])))), remaining)
     195   (ParseSuccess (CC (negateClass s)), remaining)
    192196negateCharClassResult(failureResult, remaining) = (failureResult, remaining)
    193197
  • proto/RE/Haskell/RunPablo.hs

    r3599 r3826  
    55
    66import Data.Bits
     7import Data.Char
     8import SparseCharSet
    79
    810data PabloE = All(Int) | Var(String) | And(PabloE, PabloE) | Or(PabloE, PabloE) | Not(PabloE)
    9               | CharClass(String) | Advance(PabloE) | MatchStar(PabloE, PabloE)
     11              | CharClass(SparseCharClass) | Advance(PabloE) | MatchStar(PabloE, PabloE)
    1012   deriving Show
    1113
     
    8284computeCC_helper(cc, [], powerOf2) = 0
    8385computeCC_helper(cc, c:cs, powerOf2)
    84    | elem c cc = powerOf2 + computeCC_helper(cc, cs, (powerOf2 * 2))
    85    | otherwise = computeCC_helper(cc, cs, (powerOf2 * 2))
     86   | elemCC(ord c, cc)  = powerOf2 + computeCC_helper(cc, cs, (powerOf2 * 2))
     87   | otherwise       = computeCC_helper(cc, cs, (powerOf2 * 2))
    8688
    8789------------------------------------------------------------------------------
  • proto/RE/Haskell/SparseCharSet.hs

    r3621 r3826  
    44
    55
    6 module SparseCharSet (CharSetItem(..), member, negateClass) where
     6module SparseCharSet (SparseCharClass, CharSetItem(..), elemCC, negateClass, insert1, insertRange) where
    77
    88import Data.Char
     
    2121
    2222-- Determine whether a character code is in a set
    23 member :: (CodePoint, [CharSetItem]) -> Bool
    24 member (c, []) = False
    25 member (c, CharRange(lo, hi):more)
     23elemCC :: (CodePoint, [CharSetItem]) -> Bool
     24elemCC (c, []) = False
     25elemCC (c, CharRange(lo, hi):more)
    2626  | c < lo  =  False
    27   | c > hi  = member(c, more)
     27  | c > hi  = elemCC(c, more)
    2828  | otherwise = True
    2929
Note: See TracChangeset for help on using the changeset viewer.