 Timestamp:
 Sep 19, 2014, 9:35:09 AM (5 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/charsetcompiler/UCD/unicode_set.py
r4177 r4178 24 24 Mixed = 1 25 25 26 default_log_2_quad_bits = 6 26 default_log2_quad_bits = 5 27 28 log2_quad_bits = default_log2_quad_bits 29 quad_bits = 1 << log2_quad_bits 30 mod_quad_bit_mask = quad_bits  1 31 UnicodeQuadCount = 0x110000 / quad_bits # 2**log2_quad_bits codepoints per quad 32 FullQuadMask = (1<<(quad_bits))  1 33 run_bytes = 2 34 27 35 28 36 class UCset: 29 def __init__(self , log2_quad_bits = default_log_2_quad_bits):37 def __init__(self): 30 38 self.runs = [] 31 39 self.quads = [] 32 40 self.quad_count = 0 33 self.run_bytes = 234 self.log2_quad_bits = log2_quad_bits35 self.quad_bits = 1 << log2_quad_bits36 self.mod_quad_bit_mask = self.quad_bits  137 self.UnicodeQuadCount = 0x110000 / self.quad_bits # 2**log2_quad_bits codepoints per quad38 self.FullQuadMask = (1<<(self.quad_bits))  139 41 40 42 # internal methods … … 53 55 if q == 0: 54 56 self.append_run(Empty, 1) 55 elif q & self.FullQuadMask == self.FullQuadMask:57 elif q & FullQuadMask == FullQuadMask: 56 58 self.append_run(Full, 1) 57 59 else: … … 61 63 # printing 62 64 def showC(self, name, indent = 8, entries_per_line = 4): 63 hex_specifier = "%%#0%ix" % ( self.quad_bits/4 + 2)65 hex_specifier = "%%#0%ix" % (quad_bits/4 + 2) 64 66 runtype = {1:"Full", 0:"Empty", 1: "Mixed"} 65 67 setrep = (" " * indent) + ("%s.runs = {" % name) … … 83 85 84 86 def bytes(self): 85 return (len(self.runs) * self.run_bytes) + (len(self.quads) * self.quad_bits/8)87 return (len(self.runs) * run_bytes) + (len(self.quads) * quad_bits/8) 86 88 87 89 … … 89 91 # Set Operations 90 92 # 91 def empty_ set(log2_quad_bits = default_log_2_quad_bits):92 e = UCset( log2_quad_bits)93 e.runs = [(Empty, e.UnicodeQuadCount)]93 def empty_uset(): 94 e = UCset() 95 e.runs = [(Empty, UnicodeQuadCount)] 94 96 e.quads = [] 95 e.quad_count = e.UnicodeQuadCount97 e.quad_count = UnicodeQuadCount 96 98 return e 97 99 98 def singleton_ set(codepoint, log2_quad_bits = default_log_2_quad_bits):99 e = UCset( log2_quad_bits)100 def singleton_uset(codepoint): 101 e = UCset() 100 102 quad_no = codepoint >> log2_quad_bits 101 quad_val = 1 << (codepoint & e.mod_quad_bit_mask)103 quad_val = 1 << (codepoint & mod_quad_bit_mask) 102 104 if quad_no > 0: e.append_run(Empty, quad_no) 103 105 e.append_run(Mixed, 1) 104 106 e.quads = [quad_val] 105 if quad_no < e.UnicodeQuadCount  1: e.append_run(Empty, e.UnicodeQuadCount  (quad_no + 1))106 e.quad_count = e.UnicodeQuadCount107 if quad_no < UnicodeQuadCount  1: e.append_run(Empty, UnicodeQuadCount  (quad_no + 1)) 108 e.quad_count = UnicodeQuadCount 107 109 return e 108 110 109 def make_range_set(lo_codepoint, hi_codepoint, log2_quad_bits = default_log_2_quad_bits):110 e = UCset( log2_quad_bits)111 lo_quad_no = lo_codepoint >> e.log2_quad_bits112 hi_quad_no = hi_codepoint >> e.log2_quad_bits113 lo_offset = lo_codepoint & e.mod_quad_bit_mask114 hi_offset = hi_codepoint & e.mod_quad_bit_mask111 def range_uset(lo_codepoint, hi_codepoint): 112 e = UCset() 113 lo_quad_no = lo_codepoint >> log2_quad_bits 114 hi_quad_no = hi_codepoint >> log2_quad_bits 115 lo_offset = lo_codepoint & mod_quad_bit_mask 116 hi_offset = hi_codepoint & mod_quad_bit_mask 115 117 if lo_quad_no > 0: e.append_run(Empty, lo_quad_no) 116 118 if lo_quad_no == hi_quad_no: 117 quad = ( e.FullQuadMask << lo_offset) & (e.FullQuadMask >> (e.quad_bits  1  hi_offset))119 quad = (FullQuadMask << lo_offset) & (FullQuadMask >> (quad_bits  1  hi_offset)) 118 120 e.append_quad(quad) 119 121 else: 120 e.append_quad(( e.FullQuadMask << lo_offset) & e.FullQuadMask)122 e.append_quad((FullQuadMask << lo_offset) & FullQuadMask) 121 123 e.append_run(Full, hi_quad_no  (lo_quad_no + 1)) 122 e.append_quad(( e.FullQuadMask >> (e.quad_bits  1  hi_offset)) & e.FullQuadMask)123 if hi_quad_no < e.UnicodeQuadCount  1: e.append_run(Empty, e.UnicodeQuadCount  (hi_quad_no + 1))124 e.append_quad((FullQuadMask >> (quad_bits  1  hi_offset)) & FullQuadMask) 125 if hi_quad_no < UnicodeQuadCount  1: e.append_run(Empty, UnicodeQuadCount  (hi_quad_no + 1)) 124 126 return e 125 127 … … 159 161 if this_run_type == Mixed: self.quad_no += remain 160 162 n = remain 163 164 165 def uset_member(s, codepoint): 166 quad_no = codepoint / quad_bits 167 quad_val = 1 << (codepoint & mod_quad_bit_mask) 168 it = Uset_Iterator(s) 169 it.advance(quad_no) 170 return (it.get_quad() & quad_val) != 0 161 171 162 def complement (s):163 assert s.quad_count == s.UnicodeQuadCount164 iset = UCset( s.log2_quad_bits)172 def uset_complement (s): 173 assert s.quad_count == UnicodeQuadCount 174 iset = UCset() 165 175 it = Uset_Iterator(s) 166 R = s.runs167 Q = s.quads168 176 while not it.at_end(): 169 177 (runtype, n) = it.current_run() … … 181 189 182 190 183 def intersect(s1, s2):184 assert s1.quad_count == s1.UnicodeQuadCount185 assert s2.quad_count == s1.UnicodeQuadCount191 def uset_intersection (s1, s2): 192 assert s1.quad_count == UnicodeQuadCount 193 assert s2.quad_count == UnicodeQuadCount 186 194 iset = UCset() 187 195 i1 = Uset_Iterator(s1) … … 216 224 return iset 217 225 218 def u nion (s1, s2):219 assert s1.quad_count == s1.UnicodeQuadCount220 assert s2.quad_count == s1.UnicodeQuadCount226 def uset_union (s1, s2): 227 assert s1.quad_count == UnicodeQuadCount 228 assert s2.quad_count == UnicodeQuadCount 221 229 iset = UCset() 222 230 i1 = Uset_Iterator(s1) … … 251 259 return iset 252 260 253 def difference (s1, s2):254 assert s1.quad_count == s1.UnicodeQuadCount255 assert s2.quad_count == s1.UnicodeQuadCount261 def uset_difference (s1, s2): 262 assert s1.quad_count == UnicodeQuadCount 263 assert s2.quad_count == UnicodeQuadCount 256 264 iset = UCset() 257 265 i1 = Uset_Iterator(s1) … … 286 294 return iset 287 295 288 def symmetric_difference (s1, s2):289 assert s1.quad_count == s1.UnicodeQuadCount290 assert s2.quad_count == s1.UnicodeQuadCount296 def uset_symmetric_difference (s1, s2): 297 assert s1.quad_count == UnicodeQuadCount 298 assert s2.quad_count == UnicodeQuadCount 291 299 iset = UCset() 292 300 i1 = Uset_Iterator(s1) … … 298 306 if s1_type == Empty and s2_type == Full or s1_type == Full and s2_type == Empty: 299 307 iset.append_run(Full, n) 308 i1.advance(n) 309 i2.advance(n) 300 310 elif s1_type == Full and s2_type == Full or s1_type == Empty and s2_type == Empty: 301 311 iset.append_run(Empty, n) 312 i1.advance(n) 313 i2.advance(n) 302 314 elif s1_type == Empty: 303 315 for i in range(n):
Note: See TracChangeset
for help on using the changeset viewer.