Changeset 4370


Ignore:
Timestamp:
Dec 29, 2014, 9:27:35 AM (4 years ago)
Author:
cameron
Message:

Factor out if-hierarchy support

Location:
proto/charsetcompiler
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/unicode_category_compiler.py

    r4355 r4370  
    1313from charset_def import *
    1414from UCD.general_category import *
     15from if_hierarchy import *
    1516import optparse, sys
    1617
    17 
    18 # Generate a simplest possible test for a Unicode codepoint range
    19 # such that each 1 bit marks a position within a UTF-8 initial
    20 # subsequence such that each legal continuation of that subsequence
    21 # is within the range.  Return the generated variable.
    22 #
    23 # The test may be made up of up to three parts:
    24 # (a) a multibyte low-boundary test,
    25 # (b) a multibyte high-boundary test, and
    26 # (c) a range test.
    27 # It is possible that the low- and high- boundary tests have
    28 # a common multibyte prefix.
    29 def utf8_iftest_compiler(cgo, lo, hi):
    30   lo_byte = utf8_byte(lo, 1)
    31   hi_byte = utf8_byte(hi, 1)
    32   targetVar = "cp_range_%x_%x" % (lo, hi)
    33   return utf8_iftest_helper(cgo, lo, hi, 1, targetVar, TrueLiteral())
    34 
    35 def utf8_iftest_helper(cgo, lo, hi, byte_no, targetVar, marker):
    36   lo_byte = utf8_byte(lo, byte_no)
    37   hi_byte = utf8_byte(hi, byte_no)
    38   at_lo_boundary = lo == 0 or utf8_byte(lo-1, byte_no) != lo_byte
    39   at_hi_boundary = hi == 0x10FFFF or utf8_byte(hi+1, byte_no) != hi_byte
    40   if at_lo_boundary and at_hi_boundary:
    41     if lo_byte == hi_byte:
    42       byteVar = "byte_%x" % lo_byte
    43     else:
    44       if lo == 0x80: lo_byte = 0xC0
    45       if hi == 0x10FFFF: hi_byte = 0xFF
    46       byteVar = "byte_range_%x_%x" % (lo_byte, hi_byte)
    47     cgo.chardef_canonical(CanonicalCharSetDef(byteVar, [(lo_byte, hi_byte)]))
    48     return cgo.expr_string_to_variable(cgo.expr2py(make_and(marker, Var(byteVar))))
    49   elif lo_byte == hi_byte:
    50     byteVar = "byte_%x" % lo_byte
    51     cgo.chardef_canonical(CanonicalCharSetDef(byteVar, [(lo_byte, hi_byte)]))
    52     new_marker = make_shift_forward(make_and(marker, Var(byteVar)), 1)
    53     return utf8_iftest_helper(cgo, lo, hi, byte_no+1, targetVar, new_marker)
    54   elif not at_hi_boundary:
    55     hi1 = min_codepoint_with_common_bytes(hi, byte_no)
    56     e1 = utf8_iftest_helper(cgo, lo, hi1-1, byte_no, targetVar, marker)
    57     e2 = utf8_iftest_helper(cgo, hi1, hi, byte_no, targetVar, marker)
    58     return cgo.expr_string_to_variable(cgo.expr2py(make_or(Var(e1), Var(e2))))
    59   else: # if at_hi_boundary:
    60     lo1 = max_codepoint_with_common_bytes(lo, byte_no)
    61     e1 = utf8_iftest_helper(cgo, lo, lo1, byte_no, targetVar, marker)
    62     e2 = utf8_iftest_helper(cgo, lo1+1, hi, byte_no, targetVar, marker)
    63     return cgo.expr_string_to_variable(cgo.expr2py(make_or(Var(e1), Var(e2))))
    64    
    65 def min_codepoint_with_common_bytes(cp, byte_no):
    66   u8len = utf8_length(cp)
    67   mask = (1 << (u8len-byte_no) * 6) - 1
    68   lo_cp = cp &~ mask
    69   if lo_cp == 0: return mask + 1
    70   else: return lo_cp
    71 
    72 def max_codepoint_with_common_bytes(cp, byte_no):
    73   u8len = utf8_length(cp)
    74   mask = (1 << (u8len-byte_no) * 6) - 1
    75   return cp | mask
    76 
    77 
    78 def generateCharClassDefsInIfHierarchy(cgo, enclosingRange, ifRangeList, charClassMap):
    79 #   inner_code = []
    80    (outer_lo, outer_hi) = enclosingRange
    81    enclosedRanges = rangeIntersect(ifRangeList, outer_lo, outer_hi)
    82    missingRanges = rangeGaps(enclosedRanges, outer_lo, outer_hi)
    83    for rg in missingRanges:
    84      (rglo, rghi) = rg
    85      generateCharClassSubDefs(cgo, rglo, rghi, charClassMap)
    86    topRanges = outerRanges(enclosedRanges)
    87    inner = innerRanges(enclosedRanges)
    88    for rg in topRanges:
    89      (rglo, rghi) = rg
    90      empty_range = True
    91      for k in charClassMap.keys():
    92         if rangeIntersect(charClassMap[k], rglo, rghi) != []:
    93            empty_range = False
    94            break
    95      if not empty_range:
    96        range_var = utf8_iftest_compiler(cgo, rglo, rghi)
    97        inner_cgo = CC_compiler(UTF8(), "r%x_%x" % (rglo, rghi) + '_tmp%i', False, '')
    98        inner_cgo.add_common_expressions(cgo)
    99        generateCharClassDefsInIfHierarchy(inner_cgo, rg, inner, charClassMap)
    100        if inner_cgo.generated_code != []:
    101          cgo.add_if_stmt(Var(range_var), inner_cgo.generated_code)
    102    return cgo.showcode()
    103 
    104 def generateCharClassSubDefs(cgo, lo, hi, charClassMap):
    105    for k in charClassMap.keys():
    106      if options.grep:
    107         targetVar = "all_chars"
    108      else:
    109         targetVar = "struct_%s.cc" % k
    110      subcc1 = rangeIntersect(charClassMap[k], lo, hi)
    111      # Divide by UTF-8 length, separating out E0, ED, F0 and F4 ranges
    112      for byte_range in [(0, 0x7F), (0x80, 0x7FF), (0x800, 0xFFF), (0x1000, 0xD7FF), (0xE000, 0xFFFF), (0x10000, 0x3FFFF), (0x40000, 0xFFFFF), (0x100000, 0x10FFFF)]:
    113         (lo1, hi1) = byte_range
    114         subcc2 = rangeIntersect(subcc1, lo1, hi1)
    115         utf8_sequence_generator(subcc2, 1, targetVar, cgo)
    116 
    117 def rangeIntersect(ccList, lo, hi):
    118     return [(max(lo, p[0]), min(hi, p[1])) for p in ccList if p[0] <= hi and p[1] >= lo]
    119 
    120 def rangeGaps(ccList, lo, hi):
    121     if lo >= hi: return []
    122     if ccList == []: return [(lo, hi)]
    123     (lo1, hi1) = ccList[0]
    124     if hi1 < lo: return rangeGaps(ccList[1:], lo, hi)
    125     if lo1 > lo: return [(lo, lo1 - 1)] + rangeGaps(ccList[1:], hi1+1, hi)
    126     elif hi1 < hi: return rangeGaps(ccList[1:], hi1+1, hi)
    127     else: return []
    128 
    129 def outerRanges(ccList):
    130     if len(ccList) <= 1: return ccList
    131     (lo1, hi1) = ccList[0]
    132     (lo2, hi2) = ccList[1]
    133     if hi2 <= hi1: return outerRanges([(lo1, hi1)] + ccList[2:])
    134     else: return [(lo1, hi1)] + outerRanges(ccList[1:])
    135 
    136 def innerRanges(ccList):
    137     if len(ccList) <= 1: return []
    138     (lo1, hi1) = ccList[0]
    139     (lo2, hi2) = ccList[1]
    140     if hi2 <= hi1: return [(lo2, hi2)] + innerRanges([(lo1, hi1)] + ccList[2:])
    141     else: return innerRanges(ccList[1:])
    142 
    143 
    144 
    145 def generateCharClassDefs(ifRangeList, charClassMap):
    146    cgo = CC_compiler(UTF8(), 'tmp%i', False, '')
    147    for k in charClassMap.keys():
    148      if options.grep:
    149          cgo.add_assignment("all_chars", '0')
    150      else:
    151          cgo.add_assignment("struct_%s.cc" % k, '0')
    152    generateCharClassDefsInIfHierarchy(cgo, (0, 0x10FFFF), ifRangeList, charClassMap)
    153    return cgo.showcode()
    154  
    155 
    156 #defaultIfRangeList = [(0,0x7FF), (0, 0x7F), (0x80, 0x3FF), (0x400,0x7FF), (0x800, 0xFFFF), (0x10000, 0x10FFFF)]
    157 
    158 #defaultIfRangeList = [(0x80,0x10FFFF), (0x80,0x7FF), (0x800,0xFFFF), (0x10000, 0x10FFFF)]
    159 
    160 
    161 defaultIfRangeList = [
    162 #Non-ASCII
    163 (0x80,0x10FFFF),
    164 #Two-byte sequences
    165 (0x80,0x7FF),
    166 (0x100, 0x3FF),
    167 #0100..017F; Latin Extended-A
    168 #0180..024F; Latin Extended-B
    169 #0250..02AF; IPA Extensions
    170 #02B0..02FF; Spacing Modifier Letters
    171 (0x100, 0x2FF), (0x100, 0x24F), (0x100, 0x17F), (0x180, 0x24F), (0x250, 0x2AF), (0x2B0, 0x2FF),
    172 #0300..036F; Combining Diacritical Marks
    173 #0370..03FF; Greek and Coptic
    174 (0x300, 0x36F), (0x370, 0x3FF),
    175 #0400..04FF; Cyrillic
    176 #0500..052F; Cyrillic Supplement
    177 #0530..058F; Armenian
    178 #0590..05FF; Hebrew
    179 #0600..06FF; Arabic
    180 (0x400, 0x5FF), (0x400, 0x4FF), (0x500, 0x058F), (0x500, 0x52F), (0x530, 0x58F), (0x590, 0x5FF), (0x600, 0x6FF),
    181 #0700..074F; Syriac
    182 #0750..077F; Arabic Supplement
    183 #0780..07BF; Thaana
    184 #07C0..07FF; NKo
    185 (0x700, 0x77F), (0x700, 0x74F), (0x750, 0x77F), (0x780, 0x7FF), (0x780, 0x7BF), (0x7C0, 0x7FF),
    186 #Three-byte sequences
    187 (0x800, 0xFFFF),
    188 (0x800, 0x4DFF),
    189 (0x800, 0x1FFF),
    190 (0x800, 0x0FFF),
    191 #0800..083F; Samaritan
    192 #0840..085F; Mandaic
    193 #08A0..08FF; Arabic Extended-A
    194 #0900..097F; Devanagari
    195 #0980..09FF; Bengali
    196 #0A00..0A7F; Gurmukhi
    197 #0A80..0AFF; Gujarati
    198 #0B00..0B7F; Oriya
    199 #0B80..0BFF; Tamil
    200 #0C00..0C7F; Telugu
    201 #0C80..0CFF; Kannada
    202 #0D00..0D7F; Malayalam
    203 #0D80..0DFF; Sinhala
    204 #0E00..0E7F; Thai
    205 #0E80..0EFF; Lao
    206 #0F00..0FFF; Tibetan
    207 (0x1000, 0x1FFF),
    208 #1000..109F; Myanmar
    209 #10A0..10FF; Georgian
    210 #1100..11FF; Hangul Jamo
    211 #1200..137F; Ethiopic
    212 #1380..139F; Ethiopic Supplement
    213 #13A0..13FF; Cherokee
    214 #1400..167F; Unified Canadian Aboriginal Syllabics
    215 #1680..169F; Ogham
    216 #16A0..16FF; Runic
    217 #1700..171F; Tagalog
    218 #1720..173F; Hanunoo
    219 #1740..175F; Buhid
    220 #1760..177F; Tagbanwa
    221 #1780..17FF; Khmer
    222 #1800..18AF; Mongolian
    223 #18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
    224 #1900..194F; Limbu
    225 #1950..197F; Tai Le
    226 #1980..19DF; New Tai Lue
    227 #19E0..19FF; Khmer Symbols
    228 #1A00..1A1F; Buginese
    229 #1A20..1AAF; Tai Tham
    230 #1AB0..1AFF; Combining Diacritical Marks Extended
    231 #1B00..1B7F; Balinese
    232 #1B80..1BBF; Sundanese
    233 #1BC0..1BFF; Batak
    234 #1C00..1C4F; Lepcha
    235 #1C50..1C7F; Ol Chiki
    236 #1CC0..1CCF; Sundanese Supplement
    237 #1CD0..1CFF; Vedic Extensions
    238 #1D00..1D7F; Phonetic Extensions
    239 #1D80..1DBF; Phonetic Extensions Supplement
    240 #1DC0..1DFF; Combining Diacritical Marks Supplement
    241 #1E00..1EFF; Latin Extended Additional
    242 #1F00..1FFF; Greek Extended
    243 (0x2000, 0x4DFF),(0x2000, 0x2FFF),
    244 (0x3000, 0x4DFF),
    245 (0x4E00,0x9FFF),
    246 #4E00..9FFF; CJK Unified Ideographs
    247 (0xA000,0xFFFF),
    248 
    249 (0x10000, 0x10FFFF)]
    25018
    25119
     
    25321Unicode_CC_header = "def %s(basis_bits, struct_%s):\n"
    25422Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
     23
     24
     25grep_struct = r"""
     26class Basis_bits():
     27  bit_0 = 0
     28  bit_1 = 0
     29  bit_2 = 0
     30  bit_3 = 0
     31  bit_4 = 0
     32  bit_5 = 0
     33  bit_6 = 0
     34  bit_7 = 0 
     35 
     36class Lex():
     37  LF = (0)
     38 
     39class Output():
     40        matches = 0
     41
     42def ParseLines(basis_bits, lex):
     43  temp1 = (basis_bits.bit_0 | basis_bits.bit_1)
     44  temp2 = (basis_bits.bit_2 | basis_bits.bit_3)
     45  temp3 = (temp1 | temp2)
     46  temp4 = (basis_bits.bit_4 &~ basis_bits.bit_5)
     47  temp5 = (basis_bits.bit_6 &~ basis_bits.bit_7)
     48  temp6 = (temp4 & temp5)
     49  lex.LF = (temp6 &~ temp3)
     50
     51"""
     52
     53
     54
     55
     56
    25557
    25658def generateDefs1(general_category):
     
    26062  header = "def %s(basis_bits, struct_%s):\n" % (general_category, general_category)
    26163  if options.grep:
    262         struct = r"""
    263 class Basis_bits():
    264         bit_0 = 0
    265         bit_1 = 0
    266         bit_2 = 0
    267         bit_3 = 0
    268         bit_4 = 0
    269         bit_5 = 0
    270         bit_6 = 0
    271         bit_7 = 0 
    272  
    273 class Lex():
    274         LF = (0)
    275  
    276 class Output():
    277         matches = 0
    278 
    279 def ParseLines(basis_bits, lex):
    280         temp1 = (basis_bits.bit_0 | basis_bits.bit_1)
    281         temp2 = (basis_bits.bit_2 | basis_bits.bit_3)
    282         temp3 = (temp1 | temp2)
    283         temp4 = (basis_bits.bit_4 &~ basis_bits.bit_5)
    284         temp5 = (basis_bits.bit_6 &~ basis_bits.bit_7)
    285         temp6 = (temp4 & temp5)
    286         lex.LF = (temp6 &~ temp3)
    287 
    288 """
     64        struct = grep_struct
    28965        header = "def Demo(basis_bits, lex, output):\n"
     66        template_var = "all_%s"
    29067  else:
    29168        struct = Unicode_CC_struct % (general_category)
    29269        header = "def %s(basis_bits, struct_%s):\n" % (general_category, general_category)
     70        template_var = "struct_%s.cc"
    29371  if options.flat:
    294       code = generateCharClassDefs([], catmap)
     72      code = generateCharClassDefs([], catmap, template_var)
    29573  elif options.simple:
    296       code = generateCharClassDefs([(0x80, 0x7FF), (0x800,0xFFFF), (0x10000, 0x10FFF)], catmap)
     74      code = generateCharClassDefs([(0x80, 0x7FF), (0x800,0xFFFF), (0x10000, 0x10FFF)], catmap, template_var)
    29775  else:
    298       code = generateCharClassDefs(defaultIfRangeList, catmap)
     76      code = generateCharClassDefs(defaultIfRangeList, catmap,  template_var)
    29977  if options.grep:
    30078      code += r"""
     
    31492  return main
    31593
    316 #
    317 # Partition a list of ranges into a minimum set of utf8 groups
    318 # UTF-8 prefix groups, where a group is
    319 # (a) a range of codepoints with UTF-8 prefixes of the same length
    320 #     such that every codepoint in the range is within the group, or
    321 # (b) a sublist all having the same UTF-8 initial
    322 #     byte
    323 def partition_by_UTF8_group(range_list, byte_no):
    324     if range_list == []: return []
    325     (lo, hi) = range_list[0]
    326     u8len_lo = utf8_length(lo)
    327     u8len_hi = utf8_length(hi)
    328     if u8len_lo != u8len_hi:
    329         mid = max_codepoint_of_length(u8len_lo)
    330         return partition_by_UTF8_group([(lo, mid), (mid+1, hi)] + range_list[1:], byte_no)
    331     lobyte1 = utf8_byte(lo, byte_no)
    332     hibyte1 = utf8_byte(hi, byte_no)
    333     if lobyte1 != hibyte1:
    334         if not is_low_codepoint_after_byte(lo, byte_no):
    335             lo1 = lo | ((1 << (6 * (u8len_lo - byte_no))) - 1)
    336             #print "lo--lo1:  %x--%x" % (lo, lo1)
    337             return [[(lo, lo1)]] + partition_by_UTF8_group([(lo1+1, hi)] + range_list[1:], byte_no)
    338         elif not is_high_codepoint_after_byte(hi, byte_no):
    339             hi1 = hi &~ ((1 << (6 * (u8len_lo - byte_no))) - 1)
    340             #print "lo--hi-1:  %x--%x" % (lo, hi1-1)
    341             return [[(lo, hi1-1)]] + partition_by_UTF8_group([(hi1, hi)] + range_list[1:], byte_no)
    342         else:
    343             # we have a prefix group of type (a)
    344             return [[(lo, hi)]] + partition_by_UTF8_group(range_list[1:], byte_no)
    345     group1 = [(lo, hi)]
    346     subpartitions = partition_by_UTF8_group(range_list[1:], byte_no)
    347     if subpartitions == []: return [group1]
    348     elif utf8_byte(subpartitions[0][0][0], byte_no) == lobyte1:
    349         return [group1 + subpartitions[0]] + subpartitions[1:]
    350     else:
    351         return [group1] + subpartitions
    352 
    353 # Ensure the sequence of preceding bytes is defined, up to, but
    354 # not including the given byte_no
    355 def ensure_preceding_prefix_defined(codepoint, byte_no, cgo):
    356    for i in range(1, byte_no):
    357       byte_i = utf8_byte(codepoint, i)
    358       byteVar = "byte_%x" % byte_i
    359       cgo.chardef_canonical(CanonicalCharSetDef(byteVar, [(byte_i, byte_i)]))
    360       if i > 1:
    361          pfx1 = utf8_prefix_var(codepoint, i-1)
    362          pfx1_adv = pfx1 + "_adv"
    363          cgo.add_canonical_assignment(pfx1_adv, cgo.expr2py(make_shift_forward(Var(pfx1), 1)))
    364          pfx2 = utf8_prefix_var(codepoint, i)
    365          cgo.add_canonical_assignment(pfx2, cgo.expr2py(make_and(Var(pfx1_adv), Var(byteVar))))
    366 
    367 
    368 #
    369 # Generate remaining code to match UTF-8 code sequences within
    370 # the codepoint set u8_partition, assuming that the code matching the
    371 # sequences up to byte number byte_no have been generated.
    372 #
    373 def utf8_sequence_generator(u8_partition, byte_no, targetVar, cgo):
    374    if len(u8_partition) == 0: return
    375    (lo, hi) = u8_partition[0]
    376    if utf8_length(lo) == byte_no:
    377       # We have a single byte remaining to match for all codepoints
    378       # in this partition.  Use the byte class compiler to generate
    379       # matches for these codepoints.
    380       ensure_preceding_prefix_defined(lo, byte_no, cgo)
    381       byte_pair_list = byte_definitions(u8_partition, byte_no)
    382       #print byte_pair_list
    383       if len(byte_pair_list) == 1:
    384           (lobyte, hibyte) = byte_pair_list[0]
    385           if lo == hi:
    386               final_byte_var = "byte_%x" % lobyte
    387           else:
    388               final_byte_var = "byte_range_%x_%x" % (lobyte, hibyte)
    389           cgo.chardef_canonical(CanonicalCharSetDef(final_byte_var, byte_pair_list))
    390       else:
    391           hi = u8_partition[-1][0]
    392           final_byte_var = "%s_range_%x_%x_%i" % (targetVar[-2:], lo, hi, byte_no)
    393           cgo.chardef2py(CanonicalCharSetDef(final_byte_var, byte_pair_list))
    394       test_expr = Var(final_byte_var)
    395       if byte_no > 1: 
    396          pfx1 = utf8_prefix_var(lo, byte_no-1)
    397          pfx1_adv = pfx1 + "_adv"
    398          cgo.add_canonical_assignment(pfx1_adv, cgo.expr2py(make_shift_forward(Var(pfx1), 1)))
    399          test_expr = make_and(Var(pfx1_adv), test_expr)
    400       cgo.add_assignment(targetVar, cgo.expr2py(make_or(Var(targetVar), test_expr)))
    401    else:
    402      partitions = partition_by_UTF8_group(u8_partition, byte_no)
    403      for p in partitions:
    404        (lo, hi) = p[0]
    405        lbyte = utf8_byte(lo, byte_no)
    406        hbyte = utf8_byte(hi, byte_no)
    407        ensure_preceding_prefix_defined(lo, byte_no, cgo)
    408        if lbyte == hbyte:
    409          byteVar = "byte_%x" % lbyte
    410          cgo.chardef_canonical(CanonicalCharSetDef(byteVar, [(lbyte, lbyte)]))
    411          if byte_no > 1:
    412            last_prefix = utf8_prefix_var(lo, byte_no - 1)
    413            this_prefix = utf8_prefix_var(lo, byte_no)
    414            cgo.add_canonical_assignment(this_prefix, cgo.expr2py(make_and(make_shift_forward(Var(last_prefix), 1), Var(byteVar))))
    415          if byte_no < utf8_length(lo): utf8_sequence_generator(p, byte_no+1, targetVar, cgo)
    416        else:
    417          byteVar = "byte_range_%x_%x" % (lbyte, hbyte)
    418          cgo.chardef_canonical(CanonicalCharSetDef(byteVar, [(lbyte, hbyte)]))
    419          if byte_no > 1:
    420            last_prefix = utf8_prefix_var(lo, byte_no - 1)
    421            this_prefix = last_prefix + "_" + byteVar
    422            cgo.add_canonical_assignment(this_prefix, cgo.expr2py(make_and(make_shift_forward(Var(last_prefix), 1), Var(byteVar))))
    423          else: this_prefix = byteVar
    424          suffixVar = "byte_range_%x_%x" % (0x80, 0xBF)
    425          cgo.chardef_canonical(CanonicalCharSetDef(suffixVar, [(0x80, 0xBF)]))
    426          last_prefix = this_prefix
    427          while byte_no < utf8_length(lo):
    428            byte_no += 1
    429            this_prefix = last_prefix + "_sfx"
    430            cgo.add_assignment(this_prefix, cgo.expr2py(make_and(make_shift_forward(Var(last_prefix), 1), Var(suffixVar))))
    431            last_prefix = this_prefix
    432          cgo.add_assignment(targetVar, cgo.expr2py(make_or(Var(targetVar), Var(last_prefix))))
    433 
    434 
    435 
    436 def utf8_prefix_var(codepoint, prefix_bytes):
    437    if prefix_bytes == 0:
    438       raise Exception ("utf8_prefix_var(%x, %i)" % (codepoint, prefix_bytes))
    439    elif prefix_bytes == 1:
    440       return "byte_%x" % utf8_byte(codepoint, 1)
    441    else:
    442       return "_".join(["sequence"] + ["%x" % utf8_byte(codepoint, n+1) for n in range(prefix_bytes)])
    443 
    444 
    445 def byte_definitions(range_list, n):
    446    #print ["%x--%x" % (p[0], p[1]) for p in range_list]
    447    result = [(utf8_byte(rg[0], n), utf8_byte(rg[1], n)) for rg in range_list]
    448    #print ["%x--%x" % (p[0], p[1]) for p in result]
    449    return result
    45094
    45195def main():   
Note: See TracChangeset for help on using the changeset viewer.