source: proto/RE2PBS/bin/codepoint_symbol_table.py @ 1130

Last change on this file since 1130 was 1130, checked in by ksherdy, 8 years ago

Initial check in.

File size: 3.6 KB
Line 
1# -*- coding: utf-8 -*-
2
3#
4# Objects and methods to support AnTLR Grammar (Python)
5# Regular Expressions to Parallel Bit Stream equation generator.
6#
7#  Copyright (c) 2011, Ken Herdy
8#
9#  Version 0.7 - April 15, 2011
10#
11# --------------------------------------------------------------------------------
12#  Code Point Symbol Table
13# --------------------------------------------------------------------------------
14import itertools
15
16# --------------------------------------------------------------------------------
17# Code Point Symbol
18#
19# Codepoint symbol values are stored as python 'lists of lists'
20# of length 1 (single codepoints) or length 2 (ranges)
21#
22# Codepoint values 0 - 65535 are supported.
23#
24# Duplicate code point values are removed on insertion.
25#
26# Incremental insertion is supported.
27# --------------------------------------------------------------------------------
28BadCodePointException = Exception()
29
30class CodePointSymbolTable:
31   
32    def __init__(self):
33        self.symbols = {}
34       
35    def insert(self, key, symbol):
36        if(self.lookup(key)):
37                return
38             
39        self.symbols[key] = symbol       
40        return
41 
42    def lookup(self, key):
43      if key in self.symbols.keys():
44              return True
45      return False
46
47    def debug(self): 
48      for key, symbol in self.symbols.items():
49        print symbol.debug()
50
51class CodePointSymbol():   
52   
53    def __init__(self, invert=False):
54        self.codepoints = []
55        self.invert = invert
56
57    # Incremental Setter
58    def add_codepoint(self, codepoint, invert=False):
59        if not self.is_valid_codepoint(codepoint):
60          raise BadCodePointException
61       
62        self.codepoints.append([codepoint])
63        self.canonicalize()
64       
65        self.invert = invert           
66
67    # Incremental Setter       
68    def add_codepoint_range(self, codepoint_lower, codepoint_upper, invert=False):     
69        if (not self.is_valid_codepoint(codepoint_lower) and not self.is_valid_codepoint(codepoint_upper)) or (codepoint_lower > codepoint_upper):
70           raise BadCodePointException         
71
72        self.codepoints.append([codepoint_lower, codepoint_upper])
73        self.canonicalize()
74       
75        self.invert = invert   
76
77    # Helper       
78    def is_valid_codepoint(self, codepoint):
79        return 0 <= codepoint and codepoint <= 65535
80
81    # Helper - sort and remove duplicates
82    def canonicalize(self):
83        self.codepoints.sort()
84        self.codepoints = list(self.codepoints for self.codepoints,_ in itertools.groupby(self.codepoints)) 
85
86    # Getters
87    def key(self):
88        keys = ""
89        key_list = []
90        for item in self.codepoints:
91          key_list.append( ("__").join([str(i) for i in item]) )
92         
93        return "s" + ("_").join(i for i in key_list)
94               
95    def debug(self):
96        return "Key: %s Value: %s Invert: %s" % (self.key(), self.codepoints, self.invert)
97
98#
99#       Utility Functions       
100#
101# --------------------------------------------------------------------------------
102# Utility Functions     
103# --------------------------------------------------------------------------------
104def CodePointLists2CharSetDefList(codepointss):
105        """ Translates 'lists of lists' of code points Character Class Compiler CharSetDef argument list syntax """       
106        rv = [] 
107               
108        for codepoints in codepointss: 
109          lgth = len(codepoints)
110
111          if lgth < 1:
112            raise BadCodePointException
113          elif lgth == 1:
114            rv.append(chr(codepoints[0]))
115          elif lgth == 2:
116            rv.append(chr(codepoints[0]) + '-' + chr(codepoints[1]))
117          else:
118            raise BadCodePointException
119       
120        return rv
121       
122
123def test():
124    symbol_table = CodePointSymbolTable()
125    symbol = CodePointSymbol()         
126   
127    symbol.add_codepoint(65)
128    symbol.add_codepoint(65)   
129   
130    symbol_table.insert(symbol.key(),symbol)
131   
132    symbol_table.debug()
133   
134    return 
135
136if __name__ == "__main__": test()
Note: See TracBrowser for help on using the repository browser.