source: proto/charsetcompiler/charset_def.py @ 4911

Last change on this file since 4911 was 3949, checked in by cameron, 5 years ago

Add CanonicalCharSetDef? constructor, default UTF-8 basis pattern

File size: 1.8 KB
Line 
1# -*- coding: utf-8 -*-
2#
3#  Character sets are defined as lists of items that
4#  are either individual characters or ranges of contiguous
5#  characters.
6#
7#  Aug. 1, 2014 - convert to canonical list of ranges
8#
9class CharSetDef:
10    """Definitions of character sets.   Examples:
11    CharSetDef('alpha_', ['a-z', 'A-Z', '_'])
12    CharDef('semicolon', ';') (equiv. to CharSetDef('semicolon', [';']))
13    """
14    def __init__(self, name, items, invert = False):
15        self.name = name
16        self.items = canonical_range_list(items)
17        self.complemented = invert
18    def show(self): 
19        if self.complemented:
20            return "CharSetDef(%s, %s, True)" % (self.name, self.items)
21        else: return "CharSetDef(%s, %s)" % (self.name, self.items)
22
23       
24class CharDef(CharSetDef):
25    def __init__(self, name, char, invert = False):
26        self.name = name
27        self.items = [(ord(char), ord(char))]
28        self.complemented = invert
29    def show(self): 
30        if self.complemented:
31            return "CharDef(%s, '\\%X', True)" % (self.name, ord(self.items[0]))
32        else: return "CharDef(%s, '\\%X')" % (self.name, ord(self.items[0]))
33
34class CanonicalCharSetDef(CharSetDef):
35    def __init__(self, name, items, invert = False):
36        self.name = name
37        self.items = items
38        self.complemented = invert
39
40
41def canonical_range_list(items):
42    if items == []: return []
43    items.sort()
44    ranges = [(ord(item[0]), ord(item[-1])) for item in items]
45    merged = []
46    (lo1, hi1) = ranges[0]
47    for r in ranges:
48      (lo, hi) = r
49      if lo > hi: raise Exception("Bad range (%s, %s)" % (lo, hi))
50      if lo <= hi1 + 1: 
51        if hi1 < hi: hi1 = hi
52        else: pass
53      else: 
54        merged.append((lo1, hi1))
55        (lo1, hi1) = (lo, hi)
56    merged.append((lo1, hi1))
57    return merged
58 
59
Note: See TracBrowser for help on using the repository browser.