Changeset 3962 for proto/charsetcompiler


Ignore:
Timestamp:
Aug 5, 2014, 3:20:17 PM (5 years ago)
Author:
cameron
Message:

Unicode_category_compiler fixes; ategory header template; makefile

Location:
proto/charsetcompiler
Files:
3 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/unicode_category_compiler.py

    r3960 r3962  
    181181
    182182
     183
    183184def generateCharClassDefsInIfHierarchy(cgo, enclosingRange, ifRangeList, charClassMap):
    184185#   inner_code = []
    185186   (outer_lo, outer_hi) = enclosingRange
    186    while ifRangeList!=[]:
    187      (lo, hi) = ifRangeList[0]
    188      if lo >= outer_hi: break
    189      if hi > outer_hi: raise Exception("Bad range nested (%i, %i) within (%i, %i)\n" % (lo, hi, outer_lo, outer_hi))
    190      # We have more subranges of the enclosing range
    191      if lo > outer_lo:
    192        # An innermost nest, not further embedded.
    193        generateCharClassSubDefs(cgo, outer_lo, lo-1, charClassMap)
    194      ifRangeList = ifRangeList[1:]
    195      range_var = "CC_%x_%x" % (lo, hi)
    196      utf8_range_compiler(cgo, lo, hi, range_var)
     187   enclosedRanges = rangeIntersect(ifRangeList, outer_lo, outer_hi)
     188   missingRanges = rangeGaps(enclosedRanges, outer_lo, outer_hi)
     189   for rg in missingRanges:
     190     (rglo, rghi) = rg
     191     generateCharClassSubDefs(cgo, rglo, rghi, charClassMap)
     192   topRanges = outerRanges(enclosedRanges)
     193   inner = innerRanges(enclosedRanges)
     194   for rg in topRanges:
     195     (rglo, rghi) = rg
     196     range_var = "CC_%x_%x" % (rglo, rghi)
     197     utf8_range_compiler(cgo, rglo, rghi, range_var)
    197198     inner_cgo = CC_compiler(UTF8(), range_var + '_tmp%i', False, '')
    198199     inner_cgo.add_common_expressions(cgo)
    199      generateCharClassDefsInIfHierarchy(inner_cgo, (lo, hi), ifRangeList, charClassMap)
    200      cgo.add_if_stmt(Var(range_var), inner_cgo.generated_code)
    201      outer_lo = hi + 1
    202    # Final innermost_nest
    203    if outer_lo <= outer_hi:
    204      generateCharClassSubDefs(cgo, outer_lo, outer_hi, charClassMap)
     200     generateCharClassDefsInIfHierarchy(inner_cgo, rg, inner, charClassMap)
     201     if inner_cgo.generated_code != []:
     202        cgo.add_if_stmt(Var(range_var), inner_cgo.generated_code)
    205203   return cgo.showcode()
    206204
     
    222220    return [(max(lo, p[0]), min(hi, p[1])) for p in ccList if p[0] <= hi and p[1] >= lo]
    223221
     222def rangeGaps(ccList, lo, hi):
     223    if lo >= hi: return []
     224    if ccList == []: return [(lo, hi)]
     225    (lo1, hi1) = ccList[0]
     226    if hi1 < lo: return rangeGaps(ccList[1:], lo, hi)
     227    if lo1 > lo: return [(lo, lo1 - 1)] + rangeGaps(ccList[1:], hi1+1, hi)
     228    elif hi1 < hi: return rangeGaps(ccList[1:], hi1+1, hi)
     229    else: return []
     230
     231def outerRanges(ccList):
     232    if len(ccList) <= 1: return ccList
     233    (lo1, hi1) = ccList[0]
     234    (lo2, hi2) = ccList[1]
     235    if hi2 <= hi1: return outerRanges([(lo1, hi1)] + ccList[2:])
     236    else: return [(lo1, hi1)] + outerRanges(ccList[1:])
     237
     238def innerRanges(ccList):
     239    if len(ccList) <= 1: return []
     240    (lo1, hi1) = ccList[0]
     241    (lo2, hi2) = ccList[1]
     242    if hi2 <= hi1: return [(lo2, hi2)] + innerRanges([(lo1, hi1)] + ccList[2:])
     243    else: return innerRanges(ccList[1:])
     244
     245
     246
    224247def generateCharClassDefs(ifRangeList, charClassMap):
    225248   cgo = CC_compiler(UTF8(), 'tmp%i', False, '')
    226249   for k in charClassMap.keys():
    227250     cgo.add_assignment("struct_%s.cc" % k, '0')
    228    return generateCharClassDefsInIfHierarchy(cgo, (0, 0x10FFFF), ifRangeList, charClassMap)
    229 
     251   generateCharClassDefsInIfHierarchy(cgo, (0, 0x10FFFF), ifRangeList, charClassMap)
     252   return cgo.showcode()
     253 
    230254
    231255defaultIfRangeList = [(0,0x7FF), (0, 0x7F), (0x80, 0x3FF), (0x400,0x7FF), (0x800, 0xFFFF), (0x10000, 0x10FFFF)]
     
    234258
    235259
    236 Unicode_CC_struct = "class struct_%s:\n\tcc = 0\n\n"
     260Unicode_CC_struct = "class category_%s:\n\tcc = 0\n\n"
    237261Unicode_CC_header = "def %s(basis_bits, struct_%s):\n"
    238 
     262Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
    239263def generateDefs1(general_category):
    240264  catmap = {}
     
    243267  header = "def %s(basis_bits, struct_%s):\n" % (general_category, general_category)
    244268  code = generateCharClassDefs(defaultIfRangeList, catmap)
    245   return struct + header + "".join(code)
     269  return struct + header + "".join(code) + Unicode_dummy_main
    246270
    247271
Note: See TracChangeset for help on using the changeset viewer.