Changeset 4176
 Timestamp:
 Sep 16, 2014, 4:36:24 PM (5 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/charsetcompiler/UCD/unicode_set.py
r4146 r4176 19 19 # (c) Explicitly listing all the quads of Mixed type. 20 20 # 21 UnicodeQuadCount = 0x110000 >> 6 # 2**6 codepoints per quad22 21 23 22 Empty = 0 … … 25 24 Mixed = 1 26 25 27 FullQuad = (1<<64)  1 26 default_log_2_quad_bits = 6 28 27 29 28 class UCset: 30 def __init__(self ):29 def __init__(self, log2_quad_bits = default_log_2_quad_bits): 31 30 self.runs = [] 32 31 self.quads = [] 33 32 self.quad_count = 0 33 self.run_bytes = 2 34 self.log2_quad_bits = log2_quad_bits 35 self.quad_bits = 1 << log2_quad_bits 36 self.mod_quad_bit_mask = self.quad_bits  1 37 self.UnicodeQuadCount = 0x110000 / self.quad_bits # 2**log2_quad_bits codepoints per quad 38 self.FullQuadMask = (1<<(self.quad_bits))  1 39 34 40 # internal methods 35 41 def append_run(self, runtype, runlength): … … 47 53 if q == 0: 48 54 self.append_run(Empty, 1) 49 elif q & FullQuad == FullQuad:55 elif q & self.FullQuadMask == self.FullQuadMask: 50 56 self.append_run(Full, 1) 51 57 else: … … 55 61 # printing 56 62 def showC(self, name, indent = 8, entries_per_line = 4): 63 hex_specifier = "%%#0%ix" % (self.quad_bits/4 + 2) 57 64 runtype = {1:"Full", 0:"Empty", 1: "Mixed"} 58 65 setrep = (" " * indent) + ("%s.runs = {" % name) … … 67 74 if len(self.quads) >= entries_per_line: setrep += "\n" + (" " * (indent+1)) 68 75 if self.quads != []: 69 setrep += "%#018x"% self.quads[0]76 setrep += hex_specifier % self.quads[0] 70 77 for i in range(1, len(self.quads)): 71 78 setrep += ', ' 72 79 if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1)) 73 setrep += "%#018x"% (self.quads[i])80 setrep += hex_specifier % (self.quads[i]) 74 81 setrep += '};\n' 75 return setrep 82 return setrep 83 84 def bytes(self): 85 return (len(self.runs) * self.run_bytes) + (len(self.quads) * self.quad_bits/8) 76 86 77 87 … … 87 97 # Set Operations 88 98 # 89 def empty_set( ):90 e = UCset( )99 def empty_set(log2_quad_bits = default_log_2_quad_bits): 100 e = UCset(log2_quad_bits) 91 101 e.runs = [(Empty, UnicodeQuadCount)] 92 102 e.quads = [] … … 94 104 return e 95 105 96 def singleton_set(codepoint ):97 e = UCset( )98 quad_no = codepoint >> 699 quad_val = 1 << (codepoint & 0x3F)106 def singleton_set(codepoint, log2_quad_bits = default_log_2_quad_bits): 107 e = UCset(log2_quad_bits) 108 quad_no = codepoint >> log2_quad_bits 109 quad_val = 1 << (codepoint & e.mod_quad_bit_mask) 100 110 if quad_no > 0: e.append_run(Empty, quad_no) 101 111 e.append_run(Mixed, 1) 102 112 e.quads = [quad_val] 103 if quad_no < UnicodeQuadCount  1: e.append_run(Empty,UnicodeQuadCount  (quad_no + 1))104 e.quad_count = UnicodeQuadCount113 if quad_no < e.UnicodeQuadCount  1: e.append_run(Empty, e.UnicodeQuadCount  (quad_no + 1)) 114 e.quad_count = e.UnicodeQuadCount 105 115 return e 106 116 107 def make_range_set(lo_codepoint, hi_codepoint ):108 e = UCset( )109 lo_quad_no = lo_codepoint >> 6110 hi_quad_no = hi_codepoint >> 6111 lo_offset = lo_codepoint & 0x3F112 hi_offset = hi_codepoint & 0x3F117 def make_range_set(lo_codepoint, hi_codepoint, log2_quad_bits = default_log_2_quad_bits): 118 e = UCset(log2_quad_bits) 119 lo_quad_no = lo_codepoint >> e.log2_quad_bits 120 hi_quad_no = hi_codepoint >> e.log2_quad_bits 121 lo_offset = lo_codepoint & e.mod_quad_bit_mask 122 hi_offset = hi_codepoint & e.mod_quad_bit_mask 113 123 if lo_quad_no > 0: e.append_run(Empty, lo_quad_no) 114 124 if lo_quad_no == hi_quad_no: 115 quad = ( FullQuad << lo_offset) & (FullQuad >> (63 hi_offset))125 quad = (e.FullQuadMask << lo_offset) & (e.FullQuadMask >> (e.quad_bits  1  hi_offset)) 116 126 e.append_quad(quad) 117 127 else: 118 e.append_quad(( FullQuad << lo_offset) & FullQuad)128 e.append_quad((e.FullQuadMask << lo_offset) & e.FullQuadMask) 119 129 e.append_run(Full, hi_quad_no  (lo_quad_no + 1)) 120 e.append_quad(( FullQuad >> (63  hi_offset)) & FullQuad)121 if hi_quad_no < UnicodeQuadCount  1: e.append_run(Empty,UnicodeQuadCount  (hi_quad_no + 1))130 e.append_quad((e.FullQuadMask >> (e.quad_bits  1  hi_offset)) & e.FullQuadMask) 131 if hi_quad_no < e.UnicodeQuadCount  1: e.append_run(Empty, e.UnicodeQuadCount  (hi_quad_no + 1)) 122 132 return e 123 133 124 134 125 135 def complement (s): 126 assert s.quad_count == UnicodeQuadCount127 iset = UCset( )136 assert s.quad_count == s.UnicodeQuadCount 137 iset = UCset(s.log2_quad_bits) 128 138 R = s.runs 129 139 Q = s.quads … … 135 145 iset.append_run(Empty, n) 136 146 else: 137 iset.append_mixed_run(n, [ FullQuad^ q for q in Q[0:n]])147 iset.append_mixed_run(n, [s.FullQuadMask ^ q for q in Q[0:n]]) 138 148 Q = Q[n:] 139 149 R = advance_run_list(R, n) … … 141 151 142 152 def intersect (s1, s2): 143 assert s1.quad_count == UnicodeQuadCount144 assert s2.quad_count == UnicodeQuadCount153 assert s1.quad_count == s1.UnicodeQuadCount 154 assert s2.quad_count == s1.UnicodeQuadCount 145 155 iset = UCset() 146 156 r1 = s1.runs … … 172 182 173 183 def union (s1, s2): 174 assert s1.quad_count == UnicodeQuadCount175 assert s2.quad_count == UnicodeQuadCount184 assert s1.quad_count == s1.UnicodeQuadCount 185 assert s2.quad_count == s1.UnicodeQuadCount 176 186 iset = UCset() 177 187 r1 = s1.runs … … 203 213 204 214 def difference (s1, s2): 205 assert s1.quad_count == UnicodeQuadCount206 assert s2.quad_count == UnicodeQuadCount215 assert s1.quad_count == s1.UnicodeQuadCount 216 assert s2.quad_count == s1.UnicodeQuadCount 207 217 iset = UCset() 208 218 r1 = s1.runs … … 222 232 q1 = q1[n:] 223 233 elif s1_type == Full: 224 iset.append_mixed_run(n, [ FullQuad^ q for q in q2[0:n]])234 iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q2[0:n]]) 225 235 q2 = q2[n:] 226 236 else: # both s1 and s2 have mixed blocks; form blockbyblock difference … … 235 245 236 246 def symmetric_difference (s1, s2): 237 assert s1.quad_count == UnicodeQuadCount238 assert s2.quad_count == UnicodeQuadCount247 assert s1.quad_count == s1.UnicodeQuadCount 248 assert s2.quad_count == s1.UnicodeQuadCount 239 249 iset = UCset() 240 250 r1 = s1.runs … … 257 267 q1 = q1[n:] 258 268 elif s1_type == Full: 259 iset.append_mixed_run(n, [ FullQuad^ q for q in q2[0:n]])269 iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q2[0:n]]) 260 270 q2 = q2[n:] 261 271 elif s2_type == Full: 262 iset.append_mixed_run(n, [ FullQuad^ q for q in q1[0:n]])272 iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q1[0:n]]) 263 273 q1 = q1[n:] 264 274 else: # both s1 and s2 have mixed blocks; form blockbyblock symmetric difference
Note: See TracChangeset
for help on using the changeset viewer.