Ignore:
Timestamp:
Apr 17, 2014, 6:20:11 PM (5 years ago)
Author:
cameron
Message:

Update for SparseCharSet?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/REparse.hs

    r3614 r3826  
    88       
    99import Data.Char
     10import SparseCharSet
    1011import CanonicalRE
    1112
     
    129130parseRE_unit(s) = parseCC(s)
    130131
     132-- Make a character class from a single character
     133cc1 c = let v = ord c in CC [CharRange(v, v)]
     134
    131135-- parseCC deals with individual characters (unitary character classes)
    132136-- and all other forms specifying classes of characters.
    133 parseCC('.':more) = (ParseSuccess (CC (map chr ([0..9]++[11..127]))), more)
     137parseCC('.':more) = (ParseSuccess (CC [CharRange(0,9), CharRange(11,127)]), more)
    134138--
    135139-- Any of the RE metacharacters may be represented using a backslash escape.
    136140--
    137 parseCC('\\':'?':more) = (ParseSuccess (CC("?")), more)
    138 parseCC('\\':'+':more) = (ParseSuccess (CC("+")), more)
    139 parseCC('\\':'*':more) = (ParseSuccess (CC("*")), more)
    140 parseCC('\\':'(':more) = (ParseSuccess (CC("(")), more)
    141 parseCC('\\':')':more) = (ParseSuccess (CC(")")), more)
    142 parseCC('\\':'{':more) = (ParseSuccess (CC("{")), more)
    143 parseCC('\\':'}':more) = (ParseSuccess (CC("}")), more)
    144 parseCC('\\':'[':more) = (ParseSuccess (CC("[")), more)
    145 parseCC('\\':']':more) = (ParseSuccess (CC("]")), more)
    146 parseCC('\\':'|':more) = (ParseSuccess (CC("|")), more)
    147 parseCC('\\':'.':more) = (ParseSuccess (CC(".")), more)
    148 parseCC('\\':'\\':more) = (ParseSuccess (CC("\\")), more)
     141parseCC('\\':'?':more) = (ParseSuccess (cc1('?')), more)
     142parseCC('\\':'+':more) = (ParseSuccess (cc1('+')), more)
     143parseCC('\\':'*':more) = (ParseSuccess (cc1('*')), more)
     144parseCC('\\':'(':more) = (ParseSuccess (cc1('(')), more)
     145parseCC('\\':')':more) = (ParseSuccess (cc1(')')), more)
     146parseCC('\\':'{':more) = (ParseSuccess (cc1('{')), more)
     147parseCC('\\':'}':more) = (ParseSuccess (cc1('}')), more)
     148parseCC('\\':'[':more) = (ParseSuccess (cc1('[')), more)
     149parseCC('\\':']':more) = (ParseSuccess (cc1(']')), more)
     150parseCC('\\':'|':more) = (ParseSuccess (cc1('|')), more)
     151parseCC('\\':'.':more) = (ParseSuccess (cc1('.')), more)
     152parseCC('\\':'\\':more) = (ParseSuccess (cc1('\\')), more)
    149153--
    150154-- Any other use of backslash is an error.
     
    159163parseCC(c:more)
    160164   | elem c "?+*(){}[]|"   = (ParseFailure "Metacharacter alone", c:more)
    161    | otherwise             = (ParseSuccess (CC [c]), more)
     165   | otherwise             = (ParseSuccess (cc1 c), more)
    162166
    163167
     
    166170-- parseCC_body1(c, s)
    167171parseCC_body :: String -> (ParseResult, String)
    168 parseCC_body1 :: (Char, String, String) -> (ParseResult, String)
    169 parseCC_body0 :: (String, String) -> (ParseResult, String)
     172parseCC_body1 :: (Char, String, SparseCharClass) -> (ParseResult, String)
     173parseCC_body0 :: (String, SparseCharClass) -> (ParseResult, String)
    170174
    171175parseCC_body([]) = (ParseFailure "Unclosed character class", [])
     
    175179
    176180parseCC_body1(a, [], ccSoFar) = (ParseFailure "Unclosed character class", [])
    177 parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (ccSoFar ++ [a])), more)
     181parseCC_body1(a, ']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(']'), ccSoFar))), more)
    178182parseCC_body1(a, [_], ccSoFar) = (ParseFailure "Unclosed character class", [])
    179 parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC [a, '-']), more)
    180 parseCC_body1(a, '-':b:more, ccSoFar) = parseCC_body0(more, ccSoFar ++ (map chr [ord(a)..ord(b)]))
    181 parseCC_body1(a, b:more, ccSoFar) = parseCC_body1(b, more, ccSoFar ++ [a])
     183parseCC_body1(a, '-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord(a), insert1(ord('-'), ccSoFar)))), more)
     184parseCC_body1(a, '-':b:more, ccSoFar) = parseCC_body0(more, insertRange(ord(a), ord(b), ccSoFar))
     185parseCC_body1(a, b:more, ccSoFar) = parseCC_body1(b, more, insert1(ord(a), ccSoFar))
    182186
    183187parseCC_body0([], ccSoFar) = (ParseFailure "Unclosed character class", [])
    184188parseCC_body0(']':more, ccSoFar) = (ParseSuccess (CC ccSoFar), more)
    185 parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (ccSoFar ++ ['-'])), more)
     189parseCC_body0('-':']':more, ccSoFar) = (ParseSuccess (CC (insert1(ord('-'), ccSoFar))), more)
    186190parseCC_body0('-':more, ccSoFar) = (ParseFailure "Bad range in character class", more)
    187191parseCC_body0(a:more, ccSoFar) = parseCC_body1(a, more, ccSoFar)
     
    189193
    190194negateCharClassResult(ParseSuccess (CC s), remaining) =
    191    (ParseSuccess (CC (filter (\c -> not(elem c s)) (map chr ([0..9]++[11..127])))), remaining)
     195   (ParseSuccess (CC (negateClass s)), remaining)
    192196negateCharClassResult(failureResult, remaining) = (failureResult, remaining)
    193197
Note: See TracChangeset for help on using the changeset viewer.