source: proto/RE2Pablo/ref/re2pbs/bin/codepoint_symbol_table.py @ 2226

Last change on this file since 2226 was 2226, checked in by ksherdy, 7 years ago

Initial check in.

File size: 3.8 KB
Line 
1# -*- coding: utf-8 -*-
2
3#
4# Objects and methods to support AnTLR Grammar (Python)
5# Regular Expressions to Parallel Bit Stream equation generator.
6#
7# Copyright (c) 2011, Ken Herdy
8#
9# Version 0.7 - April 15, 2011
10#
11# --------------------------------------------------------------------------------
12#  Code Point Symbol Table
13# --------------------------------------------------------------------------------
14import itertools
15
16# --------------------------------------------------------------------------------
17# Code Point Symbol
18#
19# Codepoint symbol values are stored as python 'lists of lists'
20# of length 1 (single codepoints) or length 2 (ranges)
21#
22# Codepoint values 0 - 65535 are supported.
23#
24# Duplicate code point values are removed on insertion.
25#
26# Incremental insertion is supported.
27# --------------------------------------------------------------------------------
28from charset_def import CharSetDef
29
30BadCodePointException = Exception()
31
32class CodePointSymbolTable:
33   
34    def __init__(self):
35        self.symbols = {}
36       
37    def insert(self, key, symbol):
38        if(self.lookup(key)):
39                return
40             
41        self.symbols[key] = symbol       
42        return
43 
44    def lookup(self, key):
45      if key in self.symbols.keys():
46              return True
47      return False
48
49    def debug(self): 
50      for key, symbol in self.symbols.items():
51        print symbol.debug()
52
53# --------------------------------------------------------------------------------
54class CodePointSymbol():   
55   
56    def __init__(self, invert=False):
57        self.codepoints = []
58        self.invert = invert
59
60    # Incremental Setter
61    def add_codepoint(self, codepoint, invert=False):
62        if not self.is_valid_codepoint(codepoint):
63          raise BadCodePointException
64       
65        self.codepoints.append([codepoint])
66        self.canonicalize()
67       
68        self.invert = invert           
69
70    # Incremental Setter       
71    def add_codepoint_range(self, codepoint_lower, codepoint_upper, invert=False):     
72        if (not self.is_valid_codepoint(codepoint_lower) and not self.is_valid_codepoint(codepoint_upper)) or (codepoint_lower > codepoint_upper):
73           raise BadCodePointException         
74
75        self.codepoints.append([codepoint_lower, codepoint_upper])
76        self.canonicalize()
77       
78        self.invert = invert   
79
80    # Helper       
81    def is_valid_codepoint(self, codepoint):
82        return 0 <= codepoint and codepoint <= 65535
83
84    # Helper - sort and remove duplicates
85    def canonicalize(self):
86        self.codepoints.sort()
87        self.codepoints = list(self.codepoints for self.codepoints,_ in itertools.groupby(self.codepoints)) 
88
89    # Getters
90    def key(self):
91        keys = ""
92        key_list = []
93        for item in self.codepoints:
94          key_list.append( ("__").join([str(i) for i in item]) )
95         
96        return "s" + ("_").join(i for i in key_list)
97               
98    def debug(self):
99        return "Key: %s Value: %s Invert: %s" % (self.key(), self.codepoints, self.invert)
100
101# --------------------------------------------------------------------------------
102def generate_chardefsets(symbol_table_items, prefix=''):
103        defs = []
104        for key, symbol in symbol_table_items:
105                defs.append(CharSetDef(prefix + key, codepoints2chardefsets(symbol.codepoints), symbol.invert))
106        return defs
107
108def codepoints2chardefsets(codepointss):
109        """ Translates 'lists of lists' of code points Character Class Compiler CharSetDef argument list syntax """       
110        lst = []       
111               
112        for codepoints in codepointss: 
113          lgth = len(codepoints)
114
115          if lgth < 1:
116            raise BadCodePointException
117          elif lgth == 1:
118            lst.append(chr(codepoints[0]))
119          elif lgth == 2:
120            lst.append(chr(codepoints[0]) + '-' + chr(codepoints[1]))
121          else:
122            raise BadCodePointException
123       
124        return lst
125
126       
127def test():
128    symbol_table = CodePointSymbolTable()
129    symbol = CodePointSymbol()         
130   
131    symbol.add_codepoint(65)
132    symbol.add_codepoint(65)   
133   
134    symbol_table.insert(symbol.key(),symbol)
135   
136    symbol_table.debug()
137
138    symbol_table.keys()
139
140    return 
141
142if __name__ == "__main__": test()
Note: See TracBrowser for help on using the repository browser.