Changeset 3620 for proto


Ignore:
Timestamp:
Feb 3, 2014, 5:22:48 AM (5 years ago)
Author:
cameron
Message:

Simplified list of range representation; insert1, insertRange functions

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/Haskell/SparseCharSet.hs

    r3619 r3620  
    11-- Module SparseCharSet provides a data structure for
    2 -- representing the characters of a character class based
    3 -- using a combination of small-block bitmaps and block
    4 -- ranges.   This provides a compact representation
    5 -- assuming for character sets that are either sparse
    6 -- or dense.
     2-- representing the characters of a character class as a
     3-- sorted list of ranges.
    74
    85
     
    129import Data.Bits
    1310
    14 blockSize :: Int
    15 blockSize = 128
    16 unicodeMaxBlock :: Int
    17 unicodeMaxBlock = div 0x10FFFF blockSize
    18 --
    19 -- CharBlockMap(base, map) defines a bitmap for characters in
    20 -- the range [base * blockSize, (base + 1) * blockSize)
    21 -- CharBlockRange(lo, hi) defines a continuous range of characters
    22 -- [lo * blockSize, (hi + 1) * blockSize)
     11type CodePoint = Int
     12unicodeMax :: CodePoint
     13unicodeMax = 0x10FFFF
    2314
    24 data CharSetItem = CharBlockMap(Int, Integer) | CharBlockRange(Int, Int) deriving Show
     15data CharSetItem = CharRange(CodePoint, CodePoint) deriving Show
    2516
    2617-- A character class is represented as a list of CharSetItems,
     
    3021
    3122-- Determine whether a character code is in a set
    32 member :: (Int, [CharSetItem]) -> Bool
     23member :: (CodePoint, [CharSetItem]) -> Bool
    3324member (c, []) = False
    34 member (c, CharBlockMap(base, blkmap):more)
    35   | (div c blockSize) == base  = testBit blkmap (mod c blockSize)
    36   | otherwise  =  member(c, more)
    37 member (c, CharBlockRange(lo, hi):more)
    38   | b < lo  =  False
    39   | b > hi  = member(c, more)
     25member (c, CharRange(lo, hi):more)
     26  | c < lo  =  False
     27  | c > hi  = member(c, more)
    4028  | otherwise = True
    41   where b = div c blockSize
    4229
    43 negateClass :: [CharSetItem] -> [CharSetItem]
     30
     31-- Insert a codepoint into a class
     32insert1 :: (CodePoint, SparseCharClass) -> SparseCharClass
     33insert1 (c, cc) = insertRange(c, c, cc)
     34
     35-- Insert a range of codepoints into a class
     36insertRange :: (CodePoint, CodePoint, SparseCharClass) -> SparseCharClass
     37insertRange(lo, hi, []) = [CharRange(lo, hi)]
     38insertRange(lo, hi, CharRange(a, b):more)
     39  | hi < a-1      = CharRange(lo, hi):CharRange(a, b):more
     40  | lo > b+1      = CharRange(a, b):insertRange(lo, hi, more)
     41-- Ranges overlap, insert the combined range
     42  | otherwise     = insertRange(min a lo, max b hi, more)
     43
     44negateClass :: SparseCharClass -> SparseCharClass
    4445
    4546negateClass c = negateClass_helper(c, 0)
    4647
    4748negateClass_helper([], b)
    48   | b > unicodeMaxBlock =  []
    49   | otherwise           = [CharBlockRange(b, unicodeMaxBlock)]
     49  | b > unicodeMax =  []
     50  | otherwise           = [CharRange(b, unicodeMax)]
    5051
    51 negateClass_helper(m@(CharBlockMap(base, map):more), b)
    52   |  b < base        = CharBlockRange(b, base - 1): (negateClass_helper(m, b))
    53   |  otherwise       = CharBlockMap(base, xor map ((shiftL 1 blockSize) - 1)): (negateClass_helper(more, base+1))
    54 
    55 negateClass_helper(CharBlockRange(lo, hi):more, b)
    56   |  b < lo          = CharBlockRange(b, lo-1): (negateClass_helper(more, hi+1))
     52negateClass_helper(CharRange(lo, hi):more, b)
     53  |  b < lo          = CharRange(b, lo-1): (negateClass_helper(more, hi+1))
    5754  |  otherwise       = negateClass_helper(more, hi+1)
    5855
    5956
    60 
Note: See TracChangeset for help on using the changeset viewer.