source: proto/RE/Haskell/SparseCharSet.hs @ 3621

Last change on this file since 3621 was 3621, checked in by cameron, 5 years ago

remove1, removeRange

File size: 2.4 KB
Line 
1-- Module SparseCharSet provides a data structure for
2-- representing the characters of a character class as a
3-- sorted list of ranges.
4
5
6module SparseCharSet (CharSetItem(..), member, negateClass) where
7
8import Data.Char
9import Data.Bits
10
11type CodePoint = Int
12unicodeMax :: CodePoint
13unicodeMax = 0x10FFFF
14
15data CharSetItem = CharRange(CodePoint, CodePoint) deriving Show
16
17-- A character class is represented as a list of CharSetItems,
18-- kept in sorted order.
19
20type SparseCharClass = [CharSetItem]
21
22-- Determine whether a character code is in a set
23member :: (CodePoint, [CharSetItem]) -> Bool
24member (c, []) = False
25member (c, CharRange(lo, hi):more)
26  | c < lo  =  False
27  | c > hi  = member(c, more)
28  | otherwise = True
29
30-- Insert a codepoint into a class
31insert1 :: (CodePoint, SparseCharClass) -> SparseCharClass
32insert1 (c, cc) = insertRange(c, c, cc)
33
34-- Insert a range of codepoints into a class
35insertRange :: (CodePoint, CodePoint, SparseCharClass) -> SparseCharClass
36insertRange(lo, hi, []) = [CharRange(lo, hi)]
37insertRange(lo, hi, CharRange(a, b):more)
38  | hi < a-1      = CharRange(lo, hi):CharRange(a, b):more
39  | lo > b+1      = CharRange(a, b):insertRange(lo, hi, more)
40-- Ranges overlap, insert the combined range.  But do it
41-- recursively to possibly collapse even further.
42  | otherwise     = insertRange(min a lo, max b hi, more)
43
44negateClass :: SparseCharClass -> SparseCharClass
45
46negateClass c = negateClass_helper(c, 0)
47
48negateClass_helper([], b)
49  | b > unicodeMax =  []
50  | otherwise           = [CharRange(b, unicodeMax)]
51
52negateClass_helper(CharRange(lo, hi):more, b)
53  |  b < lo          = CharRange(b, lo-1): (negateClass_helper(more, hi+1))
54  |  otherwise       = negateClass_helper(more, hi+1)
55
56
57-- Remove a codepoint from a class
58remove1 :: (CodePoint, SparseCharClass) -> SparseCharClass
59remove1 (c, cc) = removeRange(c, c, cc)
60
61-- Insert a range of codepoints into a class
62removeRange :: (CodePoint, CodePoint, SparseCharClass) -> SparseCharClass
63removeRange(lo, hi, []) = []
64removeRange(lo, hi, CharRange(a, b):more)
65  | hi < a-1            = CharRange(a, b):more
66  | lo > b+1            = CharRange(a, b):removeRange(lo, hi, more)
67-- Ranges overlap
68  | lo <= a && hi >=b   = removeRange(lo, hi, more)
69  | lo <= a             = CharRange(hi+1, b):more
70  | hi >= b             = CharRange(a, lo-1):removeRange(lo, hi, more)
71  | otherwise           = CharRange(a, lo-1):CharRange(hi+1,b):more
72
Note: See TracBrowser for help on using the repository browser.