Changeset 3886 for proto


Ignore:
Timestamp:
Jun 21, 2014, 1:29:59 AM (5 years ago)
Author:
cameron
Message:

simplifier for regexps

Location:
proto/RE/Haskell
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/CanonicalRE.hs

    r3857 r3886  
    66-- Robert D. Cameron, 2013
    77
    8 module CanonicalRE (RE(..), unboundedRep) where
     8module CanonicalRE (RE(..), unboundedRep, mkSeq, mkAlt, mkRep, simplify) where
    99
    1010import SparseCharSet
     
    4040
    4141
     42-- Optimizing constructors
     43-- mkSeq - make a sequence, but flatten.  Result might not be a Seq.
     44-- If there is only one component for a sequence, simply return that.
     45mkSeq [r] = r
     46mkSeq rs = Seq (mkSeqList(rs))
     47
     48-- Build a list for Seq, flattening subsequences
     49mkSeqList [] = []
     50mkSeqList ((Seq rs): more) = mkSeqList(rs ++ more)
     51mkSeqList (r:rs) = r:(mkSeqList(rs))
     52
     53-- mkAlt make a list of alternatives, but flatten, and combine character classes
     54-- If there is only one alternative, simply return that.
     55mkAlt [r] = r
     56mkAlt rs = Alt (mkAltList(rs))
     57
     58-- Build a list for Alt, flattening alternative subgroups,
     59-- and combining character classes.   We move character
     60-- classes towards the end of the list to ensure that
     61-- all combinations are found.
     62mkAltList [] = []
     63mkAltList ((Alt rs): more) = mkAltList(rs ++ more)
     64mkAltList (CC(cs1):CC(cs2):more) = mkAltList(CC(joinCharSets(cs1, cs2)): more)
     65mkAltList (CC(cs1):a2:more) = mkAltList(a2:CC(cs1):more)
     66mkAltList (r:rs) = r:(mkAltList(rs))
     67
     68ubCombine (h1, h2)
     69  |  h1 == unboundedRep   = unboundedRep
     70  |  h2 == unboundedRep   = unboundedRep
     71  |  otherwise            = h1 * h2
     72
     73mkRep(Rep(r, lb1, ub1), lb2, ub2)
     74   | ub1 == unboundedRep && lb2 > 0    = Rep(r, lb1 * lb2, unboundedRep)
     75   | ub1 == unboundedRep && lb1 <= 1   = Rep(r, lb1 * lb2, unboundedRep)
     76   | ub1 == unboundedRep && lb2 == 0   = Rep(Rep(r, lb1, unboundedRep), 0, 1)
     77   | ub1 * lb2 >= lb1 * (lb2 + 1) - 1   = Rep(r, lb1 * lb2, ubCombine(ub1, ub2))
     78   | otherwise = Rep(Rep(r, lb1, ub1), lb2, ub2)
     79mkRep(r, lb, ub) = Rep(r, lb, ub)
     80
     81
     82-- Recursive simplifier
     83simplify (Alt as) = mkAlt(map simplify as)
     84simplify (Seq as) = mkSeq(map simplify as)
     85simplify (Rep (r, lb, ub)) = mkRep(simplify(r), lb, ub)
     86simplify r = r
  • proto/RE/Haskell/SparseCharSet.hs

    r3826 r3886  
    44
    55
    6 module SparseCharSet (SparseCharClass, CharSetItem(..), elemCC, negateClass, insert1, insertRange) where
     6module SparseCharSet (SparseCharClass, CharSetItem(..), elemCC, negateClass, insert1, joinCharSets, insertRange) where
    77
    88import Data.Char
     
    4242  | otherwise     = insertRange(min a lo, max b hi, more)
    4343
     44joinCharSets([], items) = items
     45joinCharSets(CharRange(lo1, hi1): items1, items2) = insertRange(lo1, hi1, joinCharSets(items1, items2))
     46
    4447negateClass :: SparseCharClass -> SparseCharClass
    4548
  • proto/RE/Haskell/ToUTF8.hs

    r3827 r3886  
    2626--
    2727toUTF8 (CC s) = mkAlt (map rangeToUTF8 s)
    28 
    29 -- Make an Alt, but if there is only one alternative, just return that.
    30 mkAlt [e] = e
    31 mkAlt es = Alt es
    3228
    3329
  • proto/RE/Haskell/hgrep.hs

    r3631 r3886  
    2727
    2828header = "Usage: hgrep (-h | -v | [-c] regexp file)"
    29 version = "hgrep 0.1"
     29version = "hgrep 0.8"
    3030
    3131data GrepFlags = CountLines | Help | Version deriving Eq
     
    5959         (ParseSuccess r) -> do
    6060            srcText <- readFile srcfile
    61             let matches = search_all(r, lines srcText)
     61            let matches = search_all(simplify(r), lines srcText)
    6262            if (opts == [CountLines]) then do
    6363                hPutStrLn stdout (show (length matches))
Note: See TracChangeset for help on using the changeset viewer.