-- Module Nullable provides functions for optimizing a
-- grep search expression to remove any "nullable" prefix.
-- Robert D. Cameron, 2013
module NullAble (minMatchLen, removeNullablePrefix) where
import CanonicalRE
-- Write a minimal match length function that determines the minimum length
-- string that can be matched by an RE, considering that Start and End match
-- single \n character.
minMatchLen :: RE -> Int
minMatchLen (CC s) = 1
minMatchLen Start = 1
minMatchLen End = 1
minMatchLen (Seq []) = 0
minMatchLen (Seq (r:rs)) = minMatchLen r + (minMatchLen (Seq rs))
minMatchLen (Alt [r]) = minMatchLen r
minMatchLen (Alt (r:rs)) = min (minMatchLen r) (minMatchLen (Alt rs))
minMatchLen (Rep (r, i, j)) = i * (minMatchLen r)
--- removeNullablePrefix takes a regular expression and returns
--- the version of it that has been transformed to remove any nullable
--- prefixeProto. A prefix p is nullable if it can possibly match the empty
--- string, i.e., minMatchLen(p) = 0. For example, in the regexp
--- [a-z]*-[0-9]+ the prefix [a-z]* is nullable.
removeNullablePrefix :: RE -> RE
removeNullablePrefix (CC s) = (CC s)
removeNullablePrefix Start = Start
removeNullablePrefix End = End
removeNullablePrefix (Seq []) = Seq []
removeNullablePrefix (Seq (r:rs))
| minMatchLen(r) == 0 = removeNullablePrefix(Seq rs)
| otherwise = Seq ((removeNullablePrefix r):rs)
removeNullablePrefix (Alt rs) = Alt (map removeNullablePrefix rs)
removeNullablePrefix (Rep (r, 0, ub)) = Seq []
removeNullablePrefix (Rep (r, lb, ub))
| minMatchLen(r) == 0 = Seq []
| otherwise = Seq [removeNullablePrefix(r), Rep (r, lb-1, upper_bound_dec ub)]
upper_bound_dec :: RepLimit -> RepLimit
upper_bound_dec (UpperBound u) = UpperBound (u - 1)
upper_bound_dec Unbounded = Unbounded