Ignore:
Timestamp:
Jul 3, 2012, 5:27:51 PM (7 years ago)
Author:
cameron
Message:

Identify illegal characters in character sets

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/transcode_compiler.py

    r2232 r2233  
    3131def Legacy_8bit_To_UCS2_Table(charset_name):
    3232        decoder = codecs.lookup(charset_name)
    33         table = []     
     33        table = []
     34        illegal = []
    3435        for char_val in range(0, 256):
    35                 (uch, ulen) = decoder.decode(chr(char_val))
    36                 if ulen !=1: raise Exception()
    37                 table.append(ord(uch))
    38         return table
     36                try:
     37                        (uch, ulen) = decoder.decode(chr(char_val))
     38                        if ulen !=1: raise UnicodeError()
     39                        table.append(ord(uch))
     40                except:
     41                        illegal.append(char_val)
     42                        table.append(0)
     43        return (table, illegal)
    3944
    4045               
     
    7580def WriteLegacy_8bit_CharDefs(charset_name, f):
    7681        defs = "# %s to UTF-16 decoding equations\n" % charset_name
    77         t = Legacy_8bit_To_UCS2_Table(charset_name)
     82        (tbl, illegal) = Legacy_8bit_To_UCS2_Table(charset_name)
     83        if len(illegal) > 0:
     84                bad = [chr(x) for x in illegal]
     85                defs += "illegal = %s\n" % string.join(bad, "").encode('string-escape')
    7886        for bit in range(0, 8):
    79                 cs = UCS2_Table_To_u16hi_bit(t, bit)
     87                cs = UCS2_Table_To_u16hi_bit(tbl, bit)
    8088                defs += "u16hi_bit_%i = [%s]\n" % (bit, string.join(cs, "").encode('string-escape'))
    8189        for bit in range(0, 8):
    82                 cs = UCS2_Table_To_u16lo_bit(t, bit)
     90                cs = UCS2_Table_To_u16lo_bit(tbl, bit)
    8391                defs += "x16lo_bit_%i = [%s]\n" % (bit, string.join(cs, "").encode('string-escape'))
    8492        f.write(defs)
Note: See TracChangeset for help on using the changeset viewer.