Changeset 2974


Ignore:
Timestamp:
Mar 27, 2013, 8:16:20 AM (4 years ago)
Author:
cameron
Message:

Charset compiler clean-up

Location:
proto/charsetcompiler
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/charset_compiler.py

    r2232 r2974  
    1 # -*- coding: utf-8 -*-
    21#
    32#  Character Class Compiler
     
    3231import sys, optparse
    3332import re, binascii, string
     33import EBCDIC
    3434
    3535import charset_def
     
    248248    return Var(bit_var(n))
    249249
    250 # Deprecated
    251 def make_2bit_test(var1, var2, bit_pattern):
    252   if bit_pattern == 0:
    253     return make_not(make_or(var1, var2))
    254   elif bit_pattern == 1:
    255     return make_and(make_not(var1), var2)
    256   elif bit_pattern == 2:
    257     return make_and(var1, make_not(var2))
    258   else: return make_and(var1, var2)
    259 
    260 # Deprecated
    261 def make_8bit_test(bit_pattern):
    262   return make_and(make_and(make_2bit_test(make_bitv(0), make_bitv(1), (bit_pattern >> 6) & 3),
    263                            make_2bit_test(make_bitv(2), make_bitv(3), (bit_pattern >> 4) & 3)),
    264                   make_and(make_2bit_test(make_bitv(4), make_bitv(5), (bit_pattern >> 2) & 3),
    265                            make_2bit_test(make_bitv(6), make_bitv(7), (bit_pattern) & 3)))
    266 
    267250
    268251def make_bit_test(pattern, bit_count):
     
    339322    return make_and(common, make_sel(make_bitv(8-diff_count), hi_test, lo_test))
    340323
    341 # Deprecated   
    342 def Inclusive_Range(N, n1, n2):  # require n2 >= n1
    343     if N == 0: return TrueLiteral()
    344     elif n1 >= 2**(N-1):
    345         return make_and(make_bitv(8-N), Inclusive_Range(N-1, n1 - 2**(N-1), n2 - 2**(N-1)))
    346     elif n2 < 2**(N-1):
    347         return make_and(make_not(make_bitv(8-N)), Inclusive_Range(N-1, n1, n2))
    348     else:
    349         n2_lo = n2 - 2**(N-1)
    350         lo_test = GE_Range(N-1, n1)
    351         hi_test = LE_Range(N-1, n2_lo)
    352         # special optimization?
    353         # if n2_lo + 1 == n1: return make_xor(make_bit_test(8-N), lo_test)
    354         return make_sel(make_bitv(8-N), lo_test, hi_test)
    355 
    356324BadCharSetItem = Exception()
    357325
     
    362330        if charset_item[1] == '-' and ord(charset_item[0]) <= ord(charset_item[2]):
    363331             return Make_Range(ord(charset_item[0]), ord(charset_item[2]))
    364              #  return Inclusive_Range(8, ord(charset_item[0]), ord(charset_item[2]))
    365332    print charset_item
    366333    raise BadCharSetItem
     
    501468
    502469
    503 def code_gen_for_transcode(transcode_tbl_h, transcode_tbl_l):
    504   cgo = CodeGenObject([bit_var(i) for i in range(0,8)])
    505   xor_tbl_l = [transcode_tbl_l[code] ^ code for code in range(256)]
    506   code_gen_for_transcode_8bit(cgo, "x16h", transcode_tbl_h)
    507   code_gen_for_transcode_8bit(cgo, "x16l", xor_tbl_l)
    508   return cgo.showcode()
    509 
    510 def code_gen_for_transcode_8bit(cgo, pfx, tbl):
    511   for bit in range(8):
    512     bit_mask = 256 >> bit
    513     in_run = False
    514     first_expr_found = False
    515     bit_expr = FalseLiteral()
    516     for code in range(256):
    517       if tbl[code] & bit_mask != 0:
    518         if not in_run:
    519           in_run = True
    520           run_start = code
    521       else:
    522         if in_run:
    523           if run_start == code-1:
    524             e1 = char_test_expr(chr(code - 1))
    525           else:
    526             e1 = Make_Range(run_start, code - 1)
    527           if first_expr_found:
    528             bit_expr = make_or(bit_expr, e1)
    529           else:
    530             first_expr_found = True
    531             bit_expr = e1
    532     if first_expr_found:
    533       cgo.add_assignment("%s[%i]" % (pfx, bit), expr2simd(cgo, bit_expr))
    534 
    535 # Work with tables from
    536 # /home/cameron/glibc-2.3.5/localedata/charmaps/
    537 
    538 charmap_line_RE = re.compile("<U([0-9A-F][0-9A-F])([0-9A-Z][0-9A-Z])>\s+/x([0-9a-f][0-9a-f])\s")
    539 
    540 def read_char_map(file):
    541   f = open(file)
    542   lines = f.readlines()
    543   matches = [charmap_line_RE.match(l) for l in lines]
    544   u16hi = [ord(binascii.unhexlify(m.group(1))) for m in matches if m]
    545   u16lo = [ord(binascii.unhexlify(m.group(2))) for m in matches if m]
    546   codes = [ord(binascii.unhexlify(m.group(3))) for m in matches if m]
    547   codes_OK = [c for c in range(256) if codes[c] == c]
    548   if len(codes_OK) != 256:
    549     print("Code map failure reading %s" % file)
    550   return (u16hi, u16lo)
    551 
    552 import codecs
    553 def ascii2ebcdic_chardeflist(defs):
    554         encoder = codecs.getencoder('cp037')
    555         return [xlate_chardef(d, encoder) for d in defs]
    556 
    557 def xlate_char_or_range(charset_item, encoder):
    558     if len(charset_item) == 1:
    559         return encoder(charset_item[0])
    560     elif len(charset_item) == 3:
    561         if charset_item[1] == '-' and ord(charset_item[0]) <= ord(charset_item[2]):
    562              return Make_Range(ord(charset_item[0]), ord(charset_item[2]))
    563              #  return Inclusive_Range(8, ord(charset_item[0]), ord(charset_item[2]))
    564     print charset_item
    565     raise BadCharSetItem
    566        
    567 def xlate_chardef(chardef, encoder):
    568   if isinstance(chardef, CharDef):
    569     return CharDef(chardef.name, encoder(chardef.items[0])[0], chardef.complemented)
    570   else:
    571     cdefs = []
    572     for item in chardef.items:
    573         if len(item) == 1: cdefs.append(encoder(item)[0])
    574         elif len(item) == 3:
    575           for v in range(ord(item[0]), ord(item[-1])+1):
    576             cdefs.append(encoder(chr(v))[0])
    577         else: raise BadCharSetItem
    578     return CharSetDef(chardef.name, cdefs, chardef.complemented)
    579 
    580 
    581470
    582471def main():
     
    598487                             help='pattern for generated temporaries; default: temp%i',
    599488                             )
     489    option_parser.add_option('-E', '--EBCDIC',
     490                             dest='use_EBCDIC',
     491                             action='store_true',
     492                             default=False,
     493                             help='generate definitions for EBCDIC input',
     494                             )
    600495    options, args = option_parser.parse_args(sys.argv[1:])
    601496
     
    606501            #define the characters in the list
    607502            defs = charset_input_parser.input_chardef(args[0])
    608                                                
    609             #print chardeflist2simd(defs)
    610             print chardeflist2py(defs)
    611         else:
    612             #print chardeflist2simd(DefinitionSet[args[1]])
    613             print chardeflist2py(DefinitionSet[args[1]])
    614     elif len(args) == 2 and args[1] == 'EBCDIC':
    615         defs = ascii2ebcdic_chardeflist(DefinitionSet[sys.argv[0]])
    616         print chardeflist2simd(defs)
     503        else: defs = DefinitionSet[args[1]]
     504        if options.use_EBCDIC:
     505            defs = EBCDIC.ascii2ebcdic_chardeflist(defs)                       
     506        print chardeflist2py(defs)
    617507    else:
    618508        option_parser.print_usage()
  • proto/charsetcompiler/inputs/PDF_escapes_mod_bits

    r2822 r2974  
    1 # PDF escape bit modificationspdf_mod_bit_0 = []
     1# PDF escape bit modifications
     2pdf_mod_bit_0 = []
    23pdf_mod_bit_1 = [bfnrt]
    34pdf_mod_bit_2 = [bfnrt]
Note: See TracChangeset for help on using the changeset viewer.