source: proto/RE/Haskell/SparseCharSet.hs @ 4223

Last change on this file since 4223 was 3886, checked in by cameron, 5 years ago

simplifier for regexps

File size: 2.6 KB
Line 
1-- Module SparseCharSet provides a data structure for
2-- representing the characters of a character class as a
3-- sorted list of ranges.
4
5
6module SparseCharSet (SparseCharClass, CharSetItem(..), elemCC, negateClass, insert1, joinCharSets, insertRange) where
7
8import Data.Char
9import Data.Bits
10
11type CodePoint = Int
12unicodeMax :: CodePoint
13unicodeMax = 0x10FFFF
14
15data CharSetItem = CharRange(CodePoint, CodePoint) deriving Show
16
17-- A character class is represented as a list of CharSetItems,
18-- kept in sorted order.
19
20type SparseCharClass = [CharSetItem]
21
22-- Determine whether a character code is in a set
23elemCC :: (CodePoint, [CharSetItem]) -> Bool
24elemCC (c, []) = False
25elemCC (c, CharRange(lo, hi):more)
26  | c < lo  =  False
27  | c > hi  = elemCC(c, more)
28  | otherwise = True
29
30-- Insert a codepoint into a class
31insert1 :: (CodePoint, SparseCharClass) -> SparseCharClass
32insert1 (c, cc) = insertRange(c, c, cc)
33
34-- Insert a range of codepoints into a class
35insertRange :: (CodePoint, CodePoint, SparseCharClass) -> SparseCharClass
36insertRange(lo, hi, []) = [CharRange(lo, hi)]
37insertRange(lo, hi, CharRange(a, b):more)
38  | hi < a-1      = CharRange(lo, hi):CharRange(a, b):more
39  | lo > b+1      = CharRange(a, b):insertRange(lo, hi, more)
40-- Ranges overlap, insert the combined range.  But do it
41-- recursively to possibly collapse even further.
42  | otherwise     = insertRange(min a lo, max b hi, more)
43
44joinCharSets([], items) = items
45joinCharSets(CharRange(lo1, hi1): items1, items2) = insertRange(lo1, hi1, joinCharSets(items1, items2))
46
47negateClass :: SparseCharClass -> SparseCharClass
48
49negateClass c = negateClass_helper(c, 0)
50
51negateClass_helper([], b)
52  | b > unicodeMax =  []
53  | otherwise           = [CharRange(b, unicodeMax)]
54
55negateClass_helper(CharRange(lo, hi):more, b)
56  |  b < lo          = CharRange(b, lo-1): (negateClass_helper(more, hi+1))
57  |  otherwise       = negateClass_helper(more, hi+1)
58
59
60-- Remove a codepoint from a class
61remove1 :: (CodePoint, SparseCharClass) -> SparseCharClass
62remove1 (c, cc) = removeRange(c, c, cc)
63
64-- Insert a range of codepoints into a class
65removeRange :: (CodePoint, CodePoint, SparseCharClass) -> SparseCharClass
66removeRange(lo, hi, []) = []
67removeRange(lo, hi, CharRange(a, b):more)
68  | hi < a-1            = CharRange(a, b):more
69  | lo > b+1            = CharRange(a, b):removeRange(lo, hi, more)
70-- Ranges overlap
71  | lo <= a && hi >=b   = removeRange(lo, hi, more)
72  | lo <= a             = CharRange(hi+1, b):more
73  | hi >= b             = CharRange(a, lo-1):removeRange(lo, hi, more)
74  | otherwise           = CharRange(a, lo-1):CharRange(hi+1,b):more
75
Note: See TracBrowser for help on using the repository browser.