Changeset 3943


Ignore:
Timestamp:
Aug 1, 2014, 10:52:13 AM (5 years ago)
Author:
cameron
Message:

Refactor to create CC_compiler object

Location:
proto/charsetcompiler
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/charset_compiler.py

    r3942 r3943  
    3838
    3939from bitwise_expr import *
     40from CC_compiler import *
    4041
    41 
    42 def bit_var(n):
    43 
    44 #    return 'bit[%i]' % n
    45 
    46     global options
    47     global Encoding
    48 
    49     if len(UTF_encoding.Encoding.basis_pattern) == 1:
    50         return UTF_encoding.Encoding.basis_pattern[0] % n
    51    
    52     if UTF_encoding.Encoding.name == UTF_encoding.UTF16.name:
    53         if options.little_endian == True:
    54             if n >= 8:
    55                 return UTF_encoding.Encoding.basis_pattern[0] % (n - 8)
    56             else:
    57                 return UTF_encoding.Encoding.basis_pattern[1] % n
    58         else:
    59             if n <= 7:
    60                 return UTF_encoding.Encoding.basis_pattern[0] % n
    61             else:
    62                 return UTF_encoding.Encoding.basis_pattern[1] % (n - 8)
    63 
    64     if UTF_encoding.Encoding.name == UTF_encoding.UTF32.name:
    65         if options.little_endian == True:
    66             if n >= 21:
    67                 return "unused_bit%i" % (n - 21)
    68             elif n < 21 and n >= 16:
    69                 return UTF_encoding.Encoding.basis_pattern[0] % (n - 16)
    70             elif n < 16 and n >= 8:
    71                 return UTF_encoding.Encoding.basis_pattern[1] % (n - 8)
    72             elif n < 8:
    73                 return UTF_encoding.Encoding.basis_pattern[2] % n
    74         else:
    75             if n <= 10:
    76                 return "unused_bit%i" % n
    77             elif n > 10 and n <= 15:
    78                 return UTF_encoding.Encoding.basis_pattern[0] % (n - 8)
    79             elif n > 15 and n <= 23:
    80                 return UTF_encoding.Encoding.basis_pattern[1] % (n - 16)
    81             elif n > 23:
    82                 return UTF_encoding.Encoding.basis_pattern[2] % (n - 24)
    83 
    84 def make_bitv(n):
    85        
    86     global options
    87 
    88     if options.little_endian == True:
    89         return Var(bit_var(n))
    90     else:
    91         return Var(bit_var((UTF_encoding.Encoding.bits - 1) -n))
    92        
    93 def make_bit_test(pattern, bit_count):
    94     if bit_count == 0: return TrueLiteral()
    95     bit_terms = []
    96     test_bit = 2**(bit_count - 1)
    97     for i in range(0, bit_count):
    98         if (pattern & test_bit) == 0:
    99             bit_terms.append(make_not(make_bitv((UTF_encoding.Encoding.bits - 1)-i)))   
    100         else: bit_terms.append(make_bitv((UTF_encoding.Encoding.bits - 1)-i))           
    101         test_bit >>= 1
    102     while len(bit_terms) > 1:
    103         new_terms = []
    104         for i in range(0, len(bit_terms)/ 2):
    105             new_terms.append(make_and(bit_terms[2*i], bit_terms[2*i+1]))
    106         if len(bit_terms) % 2 == 1:
    107             new_terms.append(bit_terms[-1])
    108         bit_terms = new_terms
    109     return bit_terms[0]
    110 
    111 def bit_pattern_expr(pattern, selected_bits):
    112     if selected_bits == 0: return TrueLiteral()
    113     bit_terms = []
    114     bit_no = 0
    115     while selected_bits:
    116       test_bit = 1 << bit_no
    117       if selected_bits & test_bit:
    118         if (pattern & test_bit) == 0:
    119             bit_terms = [make_not(make_bitv(bit_no))] + bit_terms
    120         else: bit_terms = [make_bitv(bit_no)] + bit_terms
    121       else: bit_terms = [TrueLiteral()] + bit_terms
    122       # Appending TrueLiteral() for nonselected bits is intended
    123       # to keep consistent grouping of variables in the next loop.
    124       selected_bits &= ~test_bit
    125       bit_no += 1
    126      
    127     while len(bit_terms) > 1:
    128         new_terms = []
    129         for i in range(0, len(bit_terms)/ 2):
    130             new_terms.append(make_and(bit_terms[2*i], bit_terms[2*i+1]))
    131         if len(bit_terms) % 2 == 1:
    132             new_terms.append(bit_terms[-1])
    133         bit_terms = new_terms
    134    
    135     return bit_terms[0]
    136    
    137 
    138 def char_test_expr(ch):
    139     #return make_bit_test(ord(ch), 8)
    140     return bit_pattern_expr(ord(ch), UTF_encoding.Encoding.mask) 
    141 
    142 def GE_Range(N, n):
    143 
    144     if N == 0: return TrueLiteral()
    145     elif N % 2 == 0 and (n >> (N - 2)) == 0:
    146         return make_or(make_or(make_bitv(N-1), make_bitv(N-2)),
    147                         GE_Range(N - 2, n))
    148     elif N % 2 == 0 and (n >> (N - 2)) == 3:   # >= 11xxxx
    149         return make_and(make_and(make_bitv(N-1), make_bitv(N-2)),
    150                         GE_Range(N - 2, n - (3 << (N-2))))
    151     elif N >= 1:
    152         hi_bit = n & (1 << (N-1))
    153         lo_bits = n - hi_bit
    154         lo_range = GE_Range(N-1, lo_bits)
    155         if hi_bit == 0:
    156             # If the hi_bit of n is not set, then whenever the corresponding bit
    157             # is set in the target, the target will certainly be >=.  Otherwise,
    158             # the value of GE_range(N-1, lo_bits) is required.
    159             return make_or(make_bitv(N-1), lo_range)
    160         else:
    161             # If the hi_bit of n is set, then the corresponding bit must be set
    162             # in the target for >= and GE_range(N-1, lo_bits) must also be true.
    163             return make_and(make_bitv(N-1), lo_range)
    164 
    165 def LE_Range(N, n):
    166     # If an N-bit pattern is all ones, then it is always
    167     # true that any n-bit value is LE this pattern.
    168     # Handling this as a special case avoids an overflow
    169     # issue with n+1 requiring more than N bits.
    170     if n+1 == 2 ** N:
    171         return TrueLiteral()
    172     else:
    173         return make_not(GE_Range(N, n+1))
    174 
    175 BadRange = Exception()
    176 
    177 def Make_Range(n1, n2):  # require n2 >= n1
    178     diff_bits = n1 ^ n2
    179     diff_count = 0
    180     while diff_bits > 0:
    181         diff_count += 1
    182         diff_bits >>= 1
    183     if n2 < n1 or diff_count > UTF_encoding.Encoding.bits: raise BadRange() 
    184     mask = 2**(diff_count) - 1
    185     #common = make_bit_test(n1 >> diff_count, 8 - diff_count)
    186     common = bit_pattern_expr(n1 & ~mask, UTF_encoding.Encoding.mask^mask)   
    187     if diff_count == 0: return common
    188     mask = 2**(diff_count-1) - 1
    189     lo_test = GE_Range(diff_count-1, n1 & mask)
    190     hi_test = LE_Range(diff_count-1, n2 & mask)
    191 
    192     return make_and(common, make_sel(make_bitv(diff_count-1), hi_test, lo_test))
    193 
    194 BadCharSetItem = Exception()
    195 
    196 def char_or_range_expr(charset_item):
    197     if len(charset_item) == 1:
    198         return char_test_expr(charset_item[0])
    199     elif len(charset_item) == 3:
    200         if charset_item[1] == '-' and ord(charset_item[0]) <= ord(charset_item[2]):
    201              return Make_Range(ord(charset_item[0]), ord(charset_item[2]))
    202     raise BadCharSetItem
    203 
    204 def charset_expr(chardef):
    205     if chardef.items == []: return FalseLiteral()
    206     if len(chardef.items) > 1:
    207         combine = True
    208         #If all of the charset items are single codepoints
    209         #such that X0 == Y0, X1 == Y1 etc.
    210         for i in range(1, len(chardef.items)):
    211             if len(chardef.items[i]) == 3:
    212                 combine = False
    213                 break
    214         if combine == True:
    215             #If charset items are all of the form X1 = X0 + 2.
    216             for i in range(1 , len(chardef.items) - 1):
    217                 curr_item = chardef.items[i]
    218                 next_item = chardef.items[i+1]
    219                 if ord(curr_item) != ord(next_item) - 2:
    220                     combine = False
    221                     break
    222         if combine == True:
    223             first_item = ord(chardef.items[0])
    224             last_item = ord(chardef.items[len(chardef.items)-1])
    225             utf_temp = UTF_encoding.Encoding.mask - 1
    226             first_item &= utf_temp
    227             last_item |= (UTF_encoding.Encoding.mask ^ utf_temp)
    228             return char_or_range_expr(chr(first_item) + '-' + chr(last_item))
    229     e1 = char_or_range_expr(chardef.items[0])
    230     for i in range(1, len(chardef.items)):   
    231         e1 = make_or(e1, char_or_range_expr(chardef.items[i]))
    232     if chardef.complemented: return make_not(e1)
    233     else: return e1
    234 
    235 #
    236 #
    237 #  Code Generation
    238 #
    239 class CodeGenObject:
    240     def __init__(self, predeclared, typedecl='BitBlock '):
    241         self.gensym_template = options.gensym_pattern
    242         self.gensym_counter = 0
    243         self.generated_code = []
    244         self.common_expression_map = {}
    245         for sym in predeclared: self.common_expression_map[sym] = sym             
    246         self.typedecl = typedecl
    247     def add_assignment(self, varname, expr):
    248         self.common_expression_map[expr] = varname
    249         #self.generated_code.append('%s%s = %s;\n' % (self.typedecl, varname, expr))
    250         self.generated_code.append('\t%s%s = %s\n' % (self.typedecl, varname, expr))
    251     def expr_string_to_variable(self, expr_string):
    252         if self.common_expression_map.has_key(expr_string):
    253             return self.common_expression_map[expr_string]
    254         else:
    255             self.gensym_counter += 1                           
    256             sym = self.gensym_template % self.gensym_counter 
    257             self.add_assignment(sym, expr_string)
    258             return sym
    259 
    260     def showcode(self):
    261         s = ''
    262         for stmt in self.generated_code: s += stmt
    263         return s
    264 
    265 def expr2simd(genobj, expr):
    266     """Translate a Boolean expression into three-address Altivec code
    267        using code generator object genobj.
    268     """
    269     if isinstance(expr, TrueLiteral): return 'simd_const_1(1)'
    270     elif isinstance(expr, FalseLiteral): return 'simd_const_1(0)'
    271     elif isinstance(expr, Var): return expr.varname
    272     elif isinstance(expr, Not):
    273        e = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand))
    274        return 'simd_andc(simd_const_1(1), %s)' % (e)
    275     elif isinstance(expr, Or):
    276        e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))
    277        e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))
    278        return 'simd_or(%s, %s)' % (e1, e2)
    279     elif isinstance(expr, Xor):
    280        e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))
    281        e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))
    282        return 'simd_xor(%s, %s)' % (e1, e2)
    283     elif isinstance(expr, And):
    284        if isinstance(expr.operand1, Not):
    285            e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1.operand))
    286            e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))
    287            return 'simd_andc(%s, %s)' % (e2, e1)
    288        elif isinstance(expr.operand2, Not):
    289            e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))
    290            e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2.operand))
    291            return 'simd_andc(%s, %s)' % (e1, e2)
    292        else:
    293            e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand1))
    294            e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.operand2))
    295            return 'simd_and(%s, %s)' % (e1, e2)
    296     elif isinstance(expr, Sel):
    297        sel = genobj.expr_string_to_variable(expr2simd(genobj, expr.sel))
    298        e1 = genobj.expr_string_to_variable(expr2simd(genobj, expr.true_branch))
    299        e2 = genobj.expr_string_to_variable(expr2simd(genobj, expr.false_branch))
    300        return 'simd_if(%s, %s, %s)' %(sel, e1, e2)
    301 
    302 def chardef2simd(genobj, chardef):
    303     genobj.add_assignment(chardef.name, expr2simd(genobj, charset_expr(chardef)))
    304 
    305 def chardeflist2simd(chardeflist):
    306     cgo = CodeGenObject([bit_var(i) for i in range(0, UTF_encoding.Encoding.bits)])
    307     for d in chardeflist:
    308         chardef2simd(cgo, d)
    309     return cgo.showcode()
    310 
    311 def expr2py(genobj, expr):
    312     """Translate a Boolean expression into three-address python code
    313        using code generator object genobj.
    314     """
    315     if isinstance(expr, TrueLiteral): return '-1'
    316     elif isinstance(expr, FalseLiteral): return '0'
    317     elif isinstance(expr, Var): return expr.varname
    318     elif isinstance(expr, Not):
    319        e = genobj.expr_string_to_variable(expr2py(genobj, expr.operand))
    320        return '(~%s)' % (e)
    321     elif isinstance(expr, Or):
    322        e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))
    323        e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))
    324        return '(%s | %s)' % (e1, e2)
    325     elif isinstance(expr, Xor):
    326        e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))
    327        e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))
    328        return '(%s ^ %s)' % (e1, e2)
    329     elif isinstance(expr, And):
    330        if isinstance(expr.operand1, Not):
    331            e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1.operand))
    332            e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))
    333            return '(%s &~ %s)' % (e2, e1)
    334        elif isinstance(expr.operand2, Not):
    335            e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))
    336            e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2.operand))
    337            return '(%s &~ %s)' % (e1, e2)
    338        else:
    339            e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand1))
    340            e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.operand2))
    341            return '(%s & %s)' % (e1, e2)
    342     elif isinstance(expr, Sel):
    343        sel = genobj.expr_string_to_variable(expr2py(genobj, expr.sel))
    344        e1 = genobj.expr_string_to_variable(expr2py(genobj, expr.true_branch))
    345        e2 = genobj.expr_string_to_variable(expr2py(genobj, expr.false_branch))
    346        return '((%s & %s)|(~(%s) & %s))' %(sel, e1, sel, e2)
    347 
    348 def chardef2py(genobj, chardef):
    349     genobj.add_assignment(chardef.name, expr2py(genobj, charset_expr(chardef)))
    350    
    351 def py_chardefmap(chardeflist):
    352     defs = ["'%s' : %s" % (d.name,d.name) for d in chardeflist]
    353     return  '{%s}' % string.join(defs, ',\n\t')
    354 
    355 def chardeflist2py(chardeflist):
    356     cgo = CodeGenObject([bit_var(i) for i in range(0, UTF_encoding.Encoding.bits)],'')
    357     for d in chardeflist:
    358         chardef2py(cgo, d)
    359     return cgo.showcode()# + "  return "+ py_chardefmap(chardeflist) + "\n"
    36042
    36143def main():   
     
    469151        if options.use_EBCDIC:
    470152            defs = EBCDIC.ascii2ebcdic_chardeflist(defs)
    471         stmts = chardeflist2py(defs)
     153        cgo = CC_compiler(UTF_encoding.Encoding, options.gensym_pattern, options.little_endian)
     154        stmts = cgo.chardeflist2py(defs)
    472155        if options.Pablo_skeleton or options.test_skeleton:
    473156          b = string.split(options.basis_pattern, ".")
Note: See TracChangeset for help on using the changeset viewer.