Changeset 3888


Ignore:
Timestamp:
Jun 21, 2014, 6:24:39 PM (5 years ago)
Author:
cameron
Message:

removeNullablePrefix, removeNullableSuffix optimizations

Location:
proto/RE/Haskell
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/CanonicalRE.hs

    r3887 r3888  
    7575   | ub1 == unboundedRep && lb1 <= 1   = Rep(r, lb1 * lb2, unboundedRep)
    7676   | ub1 == unboundedRep && lb2 == 0   = Rep(Rep(r, lb1, unboundedRep), 0, 1)
    77    | lb2 == ub2                        = Rep(r, lb1 * lb2, ub1 * ub2)
    78    | ub1 * lb2 >= lb1 * (lb2 + 1) - 1  = Rep(r, lb1 * lb2, ubCombine(ub1, ub2))
     77   | lb2 == ub2                        = mkRep(r, lb1 * lb2, ub1 * ub2)
     78   | ub1 * lb2 >= lb1 * (lb2 + 1) - 1  = mkRep(r, lb1 * lb2, ubCombine(ub1, ub2))
    7979   | otherwise = Rep(Rep(r, lb1, ub1), lb2, ub2)
    80 mkRep(r, lb, ub) = Rep(r, lb, ub)
     80mkRep(r, lb, ub)
     81   | lb == 1 && ub == 1  = r
     82   | otherwise           = Rep(r, lb, ub)
    8183
    8284
  • proto/RE/Haskell/Nullable.hs

    r3856 r3888  
    22-- grep search expression to remove any "nullable" prefix.
    33
    4 -- Robert D. Cameron, 2013
     4-- Robert D. Cameron, 2014
    55
    6 module NullAble (minMatchLen, removeNullablePrefix) where
     6module Nullable (isNullable, removeNullablePrefix, removeNullableSuffix) where
    77       
    88import CanonicalRE
    99
    10 -- Write a minimal match length function that determines the minimum length
    11 -- string that can be matched by an RE, considering that Start and End match
    12 -- single \n character.
    1310
    14 minMatchLen :: RE -> Int
    15 minMatchLen (CC s) = 1
    16 minMatchLen Start = 1
    17 minMatchLen End = 1
    18 minMatchLen (Seq []) = 0
    19 minMatchLen (Seq (r:rs)) = minMatchLen r + (minMatchLen (Seq rs))
    20 minMatchLen (Alt [r]) = minMatchLen r
    21 minMatchLen (Alt (r:rs)) = min (minMatchLen r) (minMatchLen (Alt rs))
    22 minMatchLen (Rep (r, i, j)) = i * (minMatchLen r)
     11-- A regular expression is nullable if it (a) matches the empty
     12-- string, and (b) applies everywhere.  Note that Start (^) and
     13-- End ($) match the empty string, but not everywhere).
     14
     15isNullable :: RE -> Bool
     16isNullableSeq :: [RE] -> Bool
     17isNullableAlt :: [RE] -> Bool
     18
     19isNullable (CC s) = False
     20isNullable Start = False
     21isNullable End = False
     22isNullable (Seq x) = isNullableSeq x
     23isNullable (Alt x) = isNullableAlt x
     24isNullable (Rep (r, i, j))
     25  | i == 0     = True
     26  | otherwise  = isNullable(r)
     27
     28isNullableSeq []       = True
     29isNullableSeq (a:more)
     30  | isNullable(a)      = isNullableSeq(more)
     31  | otherwise          = False
     32
     33isNullableAlt []       = False
     34isNullableAlt (a:more)
     35  | isNullable(a)      = True
     36  | otherwise          = isNullableAlt(more)
    2337
    2438--- removeNullablePrefix takes a regular expression and returns
     
    2943
    3044removeNullablePrefix :: RE -> RE
    31 removeNullablePrefix (CC s) = (CC s)
    32 removeNullablePrefix Start = Start
    33 removeNullablePrefix End = End
    34 removeNullablePrefix (Seq []) = Seq []
    35 removeNullablePrefix (Seq (r:rs))
    36    | minMatchLen(r) == 0  = removeNullablePrefix(Seq rs)
    37    | otherwise            = Seq ((removeNullablePrefix r):rs)
    38 removeNullablePrefix (Alt rs) = Alt (map removeNullablePrefix rs)
    39 removeNullablePrefix (Rep (r, 0, ub)) = Seq []
    40 removeNullablePrefix (Rep (r, lb, ub))
    41    | minMatchLen(r) == 0  = Seq []
    42    | otherwise            = Seq [removeNullablePrefix(r), Rep (r, lb-1, lb-1)]
     45removeNullablePrefix (Seq s) = Seq (removeNullableSeqPrefix(s))
     46removeNullablePrefix (Alt as) = Alt (map removeNullablePrefix as)
     47removeNullablePrefix (Rep(r, lb, ub))
     48   | lb == 0        = Seq []
     49   | isNullable(r)  = Seq []
     50   | otherwise      = Seq [removeNullablePrefix(r), Rep(r, lb-1, lb-1)]
     51-- default: do nothing
     52removeNullablePrefix r = r
    4353
     54removeNullableSeqPrefix :: [RE] -> [RE]
     55removeNullableSeqPrefix [] = []
     56removeNullableSeqPrefix (a:more)
     57  | isNullable(a)       = removeNullableSeqPrefix(more)
     58  | otherwise           = removeNullablePrefix(a):more
    4459
     60removeNullableSuffix :: RE -> RE
     61removeNullableSuffix (Seq s) = Seq (removeNullableSeqSuffix(s))
     62removeNullableSuffix (Alt as) = Alt (map removeNullableSuffix as)
     63removeNullableSuffix (Rep(r, lb, ub))
     64   | lb == 0        = Seq []
     65   | isNullable(r)  = Seq []
     66   | otherwise      = Seq [Rep(r, lb-1, lb-1), removeNullableSuffix(r)]
     67-- default: do nothing
     68removeNullableSuffix r = r
     69
     70removeNullableSeqSuffix :: [RE] -> [RE]
     71removeNullableSeqSuffix [] = []
     72removeNullableSeqSuffix (a:more)
     73  | isNullableSeq(more)       = [removeNullableSuffix(a)]
     74  | otherwise                 = a:removeNullableSeqSuffix(more)
  • proto/RE/Haskell/hgrep.hs

    r3886 r3888  
    66
    77import CanonicalRE
     8import Nullable
    89import REparse
    910import RunPablo
     
    5960         (ParseSuccess r) -> do
    6061            srcText <- readFile srcfile
    61             let matches = search_all(simplify(r), lines srcText)
     62            let r1 = simplify(removeNullableSuffix(removeNullablePrefix(r)))
     63                matches = search_all(r1, lines srcText)
    6264            if (opts == [CountLines]) then do
    6365                hPutStrLn stdout (show (length matches))
  • proto/RE/Haskell/simplify.hs

    r3887 r3888  
    66import CanonicalRE
    77import REparse
    8 import RunPablo
    9 import REcompile
     8import Nullable
    109import Data.List
    1110
     
    1716--
    1817
    19 header = "Usage: simplify (-h | -v | [-c] regexp file)"
    20 version = "simplify 0.1"
     18header = "Usage: simplify (-h | -v | -n regexp)"
     19version = "simplify 0.2"
    2120
    22 data GrepFlags = Help | Version deriving Eq
    23 simplifyOptions :: [OptDescr GrepFlags]
     21data SimplifyFlags = Help | Version | Nullable deriving Eq
     22simplifyOptions :: [OptDescr SimplifyFlags]
    2423simplifyOptions =
    25        [Option ['h'] ["help"]    (NoArg Help)        "Display help and exit.",
    26         Option ['v'] ["version"] (NoArg Version)     "Show the version number and exit."]
     24       [Option ['h'] ["help"]     (NoArg Help)        "Display help and exit.",
     25        Option ['v'] ["version"]  (NoArg Version)     "Show the version number and exit.",
     26        Option ['n'] ["nullable"] (NoArg Nullable)    "Remove nullable prefixes and suffixes."]
    2727
    2828usageMsg = usageInfo header simplifyOptions
     
    4848  case parseRE(regexp) of
    4949         (ParseSuccess r) -> do
    50             hPutStrLn stdout (show (simplify(r)))
     50            if (opts == [Nullable]) then do
     51                hPutStrLn stdout (show (simplify (removeNullableSuffix (removeNullablePrefix r))))
     52            else do
     53                hPutStrLn stdout (show (simplify r))
    5154            exitWith ExitSuccess
    5255         (ParseFailure m) -> do
Note: See TracChangeset for help on using the changeset viewer.