Changeset 3943 for proto/charsetcompiler
 Timestamp:
 Aug 1, 2014, 10:52:13 AM (5 years ago)
 Location:
 proto/charsetcompiler
 Files:

 1 added
 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/charsetcompiler/charset_compiler.py
r3942 r3943 38 38 39 39 from bitwise_expr import * 40 from CC_compiler import * 40 41 41 42 def bit_var(n):43 44 # return 'bit[%i]' % n45 46 global options47 global Encoding48 49 if len(UTF_encoding.Encoding.basis_pattern) == 1:50 return UTF_encoding.Encoding.basis_pattern[0] % n51 52 if UTF_encoding.Encoding.name == UTF_encoding.UTF16.name:53 if options.little_endian == True:54 if n >= 8:55 return UTF_encoding.Encoding.basis_pattern[0] % (n  8)56 else:57 return UTF_encoding.Encoding.basis_pattern[1] % n58 else:59 if n <= 7:60 return UTF_encoding.Encoding.basis_pattern[0] % n61 else:62 return UTF_encoding.Encoding.basis_pattern[1] % (n  8)63 64 if UTF_encoding.Encoding.name == UTF_encoding.UTF32.name:65 if options.little_endian == True:66 if n >= 21:67 return "unused_bit%i" % (n  21)68 elif n < 21 and n >= 16:69 return UTF_encoding.Encoding.basis_pattern[0] % (n  16)70 elif n < 16 and n >= 8:71 return UTF_encoding.Encoding.basis_pattern[1] % (n  8)72 elif n < 8:73 return UTF_encoding.Encoding.basis_pattern[2] % n74 else:75 if n <= 10:76 return "unused_bit%i" % n77 elif n > 10 and n <= 15:78 return UTF_encoding.Encoding.basis_pattern[0] % (n  8)79 elif n > 15 and n <= 23:80 return UTF_encoding.Encoding.basis_pattern[1] % (n  16)81 elif n > 23:82 return UTF_encoding.Encoding.basis_pattern[2] % (n  24)83 84 def make_bitv(n):85 86 global options87 88 if options.little_endian == True:89 return Var(bit_var(n))90 else:91 return Var(bit_var((UTF_encoding.Encoding.bits  1) n))92 93 def make_bit_test(pattern, bit_count):94 if bit_count == 0: return TrueLiteral()95 bit_terms = []96 test_bit = 2**(bit_count  1)97 for i in range(0, bit_count):98 if (pattern & test_bit) == 0:99 bit_terms.append(make_not(make_bitv((UTF_encoding.Encoding.bits  1)i)))100 else: bit_terms.append(make_bitv((UTF_encoding.Encoding.bits  1)i))101 test_bit >>= 1102 while len(bit_terms) > 1:103 new_terms = []104 for i in range(0, len(bit_terms)/ 2):105 new_terms.append(make_and(bit_terms[2*i], bit_terms[2*i+1]))106 if len(bit_terms) % 2 == 1:107 new_terms.append(bit_terms[1])108 bit_terms = new_terms109 return bit_terms[0]110 111 def bit_pattern_expr(pattern, selected_bits):112 if selected_bits == 0: return TrueLiteral()113 bit_terms = []114 bit_no = 0115 while selected_bits:116 test_bit = 1 << bit_no117 if selected_bits & test_bit:118 if (pattern & test_bit) == 0:119 bit_terms = [make_not(make_bitv(bit_no))] + bit_terms120 else: bit_terms = [make_bitv(bit_no)] + bit_terms121 else: bit_terms = [TrueLiteral()] + bit_terms122 # Appending TrueLiteral() for nonselected bits is intended123 # to keep consistent grouping of variables in the next loop.124 selected_bits &= ~test_bit125 bit_no += 1126 127 while len(bit_terms) > 1:128 new_terms = []129 for i in range(0, len(bit_terms)/ 2):130 new_terms.append(make_and(bit_terms[2*i], bit_terms[2*i+1]))131 if len(bit_terms) % 2 == 1:132 new_terms.append(bit_terms[1])133 bit_terms = new_terms134 135 return bit_terms[0]136 137 138 def char_test_expr(ch):139 #return make_bit_test(ord(ch), 8)140 return bit_pattern_expr(ord(ch), UTF_encoding.Encoding.mask)141 142 def GE_Range(N, n):143 144 if N == 0: return TrueLiteral()145 elif N % 2 == 0 and (n >> (N  2)) == 0:146 return make_or(make_or(make_bitv(N1), make_bitv(N2)),147 GE_Range(N  2, n))148 elif N % 2 == 0 and (n >> (N  2)) == 3: # >= 11xxxx149 return make_and(make_and(make_bitv(N1), make_bitv(N2)),150 GE_Range(N  2, n  (3 << (N2))))151 elif N >= 1:152 hi_bit = n & (1 << (N1))153 lo_bits = n  hi_bit154 lo_range = GE_Range(N1, lo_bits)155 if hi_bit == 0:156 # If the hi_bit of n is not set, then whenever the corresponding bit157 # is set in the target, the target will certainly be >=. Otherwise,158 # the value of GE_range(N1, lo_bits) is required.159 return make_or(make_bitv(N1), lo_range)160 else:161 # If the hi_bit of n is set, then the corresponding bit must be set162 # in the target for >= and GE_range(N1, lo_bits) must also be true.163 return make_and(make_bitv(N1), lo_range)164 165 def LE_Range(N, n):166 # If an Nbit pattern is all ones, then it is always167 # true that any nbit value is LE this pattern.168 # Handling this as a special case avoids an overflow169 # issue with n+1 requiring more than N bits.170 if n+1 == 2 ** N:171 return TrueLiteral()172 else:173 return make_not(GE_Range(N, n+1))174 175 BadRange = Exception()176 177 def Make_Range(n1, n2): # require n2 >= n1178 diff_bits = n1 ^ n2179 diff_count = 0180 while diff_bits > 0:181 diff_count += 1182 diff_bits >>= 1183 if n2 < n1 or diff_count > UTF_encoding.Encoding.bits: raise BadRange()184 mask = 2**(diff_count)  1185 #common = make_bit_test(n1 >> diff_count, 8  diff_count)186 common = bit_pattern_expr(n1 & ~mask, UTF_encoding.Encoding.mask^mask)187 if diff_count == 0: return common188 mask = 2**(diff_count1)  1189 lo_test = GE_Range(diff_count1, n1 & mask)190 hi_test = LE_Range(diff_count1, n2 & mask)191 192 return make_and(common, make_sel(make_bitv(diff_count1), hi_test, lo_test))193 194 BadCharSetItem = Exception()195 196 def char_or_range_expr(charset_item):197 if len(charset_item) == 1:198 return char_test_expr(charset_item[0])199 elif len(charset_item) == 3:200 if charset_item[1] == '' and ord(charset_item[0]) <= ord(charset_item[2]):201 return Make_Range(ord(charset_item[0]), ord(charset_item[2]))202 raise BadCharSetItem203 204 def charset_expr(chardef):205 if chardef.items == []: return FalseLiteral()206 if len(chardef.items) > 1:207 combine = True208 #If all of the charset items are single codepoints209 #such that X0 == Y0, X1 == Y1 etc.210 for i in range(1, len(chardef.items)):211 if len(chardef.items[i]) == 3:212 combine = False213 break214 if combine == True:215 #If charset items are all of the form X1 = X0 + 2.216 for i in range(1 , len(chardef.items)  1):217 curr_item = chardef.items[i]218 next_item = chardef.items[i+1]219 if ord(curr_item) != ord(next_item)  2:220 combine = False221 break222 if combine == True:223 first_item = ord(chardef.items[0])224 last_item = ord(chardef.items[len(chardef.items)1])225 utf_temp = UTF_encoding.Encoding.mask  1226 first_item &= utf_temp227 last_item = (UTF_encoding.Encoding.mask ^ utf_temp)228 return char_or_range_expr(chr(first_item) + '' + chr(last_item))229 e1 = char_or_range_expr(chardef.items[0])230 for i in range(1, len(chardef.items)):231 e1 = make_or(e1, char_or_range_expr(chardef.items[i]))232 if chardef.complemented: return make_not(e1)233 else: return e1234 235 #236 #237 # Code Generation238 #239 class CodeGenObject:240 def __init__(self, predeclared, typedecl='BitBlock '):241 self.gensym_template = options.gensym_pattern242 self.gensym_counter = 0243 self.generated_code = []244 self.common_expression_map = {}245 for sym in predeclared: self.common_expression_map[sym] = sym246 self.typedecl = typedecl247 def add_assignment(self, varname, expr):248 self.common_expression_map[expr] = varname249 #self.generated_code.append('%s%s = %s;\n' % (self.typedecl, varname, expr))250 self.generated_code.append('\t%s%s = %s\n' % (self.typedecl, varname, expr))251 def expr_string_to_variable(self, expr_string):252 if self.common_expression_map.has_key(expr_string):253 return self.common_expression_map[expr_string]254 else:255 self.gensym_counter += 1256 sym = self.gensym_template % self.gensym_counter257 self.add_assignment(sym, expr_string)258 return sym259 260 def showcode(self):261 s = ''262 for stmt in self.generated_code: s += stmt263 return s264 265 def expr2simd(genobj, expr):266 """Translate a Boolean expression into threeaddress Altivec code267 using code generator object genobj.268 """269 if isinstance(expr, TrueLiteral): return 'simd_const_1(1)'270 elif isinstance(expr, FalseLiteral): return 'simd_const_1(0)'271 elif isinstance(expr, Var): return expr.varname272 elif isinstance(expr, Not):273 e = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand))274 return 'simd_andc(simd_const_1(1), %s)' % (e)275 elif isinstance(expr, Or):276 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))277 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))278 return 'simd_or(%s, %s)' % (e1, e2)279 elif isinstance(expr, Xor):280 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))281 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))282 return 'simd_xor(%s, %s)' % (e1, e2)283 elif isinstance(expr, And):284 if isinstance(expr.operand1, Not):285 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1.operand))286 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))287 return 'simd_andc(%s, %s)' % (e2, e1)288 elif isinstance(expr.operand2, Not):289 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))290 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2.operand))291 return 'simd_andc(%s, %s)' % (e1, e2)292 else:293 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))294 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))295 return 'simd_and(%s, %s)' % (e1, e2)296 elif isinstance(expr, Sel):297 sel = genobj.expr_string_to_variable(expr2simd(genobj, expr.sel))298 e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.true_branch))299 e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.false_branch))300 return 'simd_if(%s, %s, %s)' %(sel, e1, e2)301 302 def chardef2simd(genobj, chardef):303 genobj.add_assignment(chardef.name, expr2simd(genobj, charset_expr(chardef)))304 305 def chardeflist2simd(chardeflist):306 cgo = CodeGenObject([bit_var(i) for i in range(0, UTF_encoding.Encoding.bits)])307 for d in chardeflist:308 chardef2simd(cgo, d)309 return cgo.showcode()310 311 def expr2py(genobj, expr):312 """Translate a Boolean expression into threeaddress python code313 using code generator object genobj.314 """315 if isinstance(expr, TrueLiteral): return '1'316 elif isinstance(expr, FalseLiteral): return '0'317 elif isinstance(expr, Var): return expr.varname318 elif isinstance(expr, Not):319 e = genobj.expr_string_to_variable(expr2py(genobj, expr.operand))320 return '(~%s)' % (e)321 elif isinstance(expr, Or):322 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))323 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))324 return '(%s  %s)' % (e1, e2)325 elif isinstance(expr, Xor):326 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))327 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))328 return '(%s ^ %s)' % (e1, e2)329 elif isinstance(expr, And):330 if isinstance(expr.operand1, Not):331 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1.operand))332 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))333 return '(%s &~ %s)' % (e2, e1)334 elif isinstance(expr.operand2, Not):335 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))336 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2.operand))337 return '(%s &~ %s)' % (e1, e2)338 else:339 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))340 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))341 return '(%s & %s)' % (e1, e2)342 elif isinstance(expr, Sel):343 sel = genobj.expr_string_to_variable(expr2py(genobj, expr.sel))344 e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.true_branch))345 e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.false_branch))346 return '((%s & %s)(~(%s) & %s))' %(sel, e1, sel, e2)347 348 def chardef2py(genobj, chardef):349 genobj.add_assignment(chardef.name, expr2py(genobj, charset_expr(chardef)))350 351 def py_chardefmap(chardeflist):352 defs = ["'%s' : %s" % (d.name,d.name) for d in chardeflist]353 return '{%s}' % string.join(defs, ',\n\t')354 355 def chardeflist2py(chardeflist):356 cgo = CodeGenObject([bit_var(i) for i in range(0, UTF_encoding.Encoding.bits)],'')357 for d in chardeflist:358 chardef2py(cgo, d)359 return cgo.showcode()# + " return "+ py_chardefmap(chardeflist) + "\n"360 42 361 43 def main(): … … 469 151 if options.use_EBCDIC: 470 152 defs = EBCDIC.ascii2ebcdic_chardeflist(defs) 471 stmts = chardeflist2py(defs) 153 cgo = CC_compiler(UTF_encoding.Encoding, options.gensym_pattern, options.little_endian) 154 stmts = cgo.chardeflist2py(defs) 472 155 if options.Pablo_skeleton or options.test_skeleton: 473 156 b = string.split(options.basis_pattern, ".")
Note: See TracChangeset
for help on using the changeset viewer.