Ignore:
Timestamp:
Jul 2, 2015, 4:52:54 PM (4 years ago)
Author:
nmedfort
Message:

Modifications to UCD property object generator.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r4462 r4632  
    1818trivial_name_char_re = re.compile('[-_\s]')
    1919def canonicalize(property_string):
    20    return trivial_name_char_re.sub('', property_string.lower())
     20    return trivial_name_char_re.sub('', property_string.lower())
    2121
    2222#
     
    8686    lines = f.readlines()
    8787    for t in lines:
    88         if UCD_skip.match(t): 
     88        if UCD_skip.match(t):
    8989            m = UCD_property_value_missing_regexp.match(t)
    9090            if m:
     
    146146#
    147147def union_of_all(uset_list):
    148    if uset_list == []: return empty_uset()
    149    else:
    150      accum_set = uset_list[0]
    151      for s in uset_list[1:]:
    152         accum_set = uset_union(accum_set, s)
    153      return accum_set
     148    if uset_list == []: return empty_uset()
     149    else:
     150        accum_set = uset_list[0]
     151        for s in uset_list[1:]:
     152            accum_set = uset_union(accum_set, s)
     153        return accum_set
    154154
    155155#
     
    170170            m = UCD_missing_regexp1.match(t)
    171171            if m:
    172               if default_value != None:
    173                 raise Exception("Default value already specified, extraneous @missing spec: %s" % t)
    174               (missing_lo, missing_hi, default_value) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
    175               default_value = canonicalize(default_value)
    176               if not canon_map.has_key(default_value):  raise Exception("Unknown default property value name '%s'" % default_value)
    177               if missing_lo != 0 or missing_hi != 0x10FFFF: raise Exception("Unexpected missing data range '%x, %x'" % (missing_lo, missing_hi))
    178               default_value = canon_map[default_value]
     172                if default_value != None:
     173                    raise Exception("Default value already specified, extraneous @missing spec: %s" % t)
     174                (missing_lo, missing_hi, default_value) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
     175                default_value = canonicalize(default_value)
     176                if not canon_map.has_key(default_value):  raise Exception("Unknown default property value name '%s'" % default_value)
     177                if missing_lo != 0 or missing_hi != 0x10FFFF: raise Exception("Unexpected missing data range '%x, %x'" % (missing_lo, missing_hi))
     178                default_value = canon_map[default_value]
    179179            continue  # skip comment and blank lines
    180180        m = UCD_point_name_regexp.match(t)
     
    182182            (codepoint, name) = (int(m.group(1), 16), m.group(2))
    183183            newset = singleton_uset(codepoint)
    184         else: 
     184        else:
    185185            m = UCD_range_name_regexp.match(t)
    186186            if not m: raise Exception("Unknown syntax: %s" % t)
     
    206206        name_list_order = ['LC', 'L', 'M', 'N', 'P', 'S', 'Z', 'C']+ name_list_order
    207207    for v in vlist:
    208         if not v in name_list_order: 
     208        if not v in name_list_order:
    209209            #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
    210210            #print("Warning: property %s has no instance of value %s" % (property_code, v))
     
    217217        if value_map.has_key(default_value):
    218218            value_map[default_value] = uset_union(value_map[default_value], need_default_value)
    219         else: 
     219        else:
    220220            value_map[default_value] = need_default_value
    221221            name_list_order.append(default_value)
     
    234234        scx_items = scx_list.split(" ")
    235235        for scx in scx_items:
    236             #sc = canonical_property_value_map[canonicalize(scx)]
     236            # sc = canonical_property_value_map[canonicalize(scx)]
    237237            sc = scx
    238             if value_map.has_key(sc): 
    239                value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
     238            if value_map.has_key(sc):
     239                value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
    240240            else: value_map[sc] = scx_set_map[scx_list]
    241241        explicitly_defined_set = uset_union(explicitly_defined_set, scx_set_map[scx_list])
    242     for v in scripts: 
     242    for v in scripts:
    243243        if value_map.has_key(v):
    244244            value_map[v] = uset_union(value_map[v], uset_difference(script_map[v], explicitly_defined_set))
     
    250250
    251251def parse_UCD_codepoint_name_map(mapfile, canon_map = None):
    252    value_map = {}
    253    name_list_order = []
    254    f = open(UCD_dir + "/" + mapfile)
    255    lines = f.readlines()
    256    for t in lines:
    257       if UCD_skip.match(t):
    258         continue  # skip comment and blank lines
    259       m = UCD_point_name_regexp.match(t)
    260       if m:
    261         (codepoint, name) = (int(m.group(1), 16), m.group(2))
    262         newset = singleton_uset(codepoint)
    263       else:
    264         m = UCD_range_name_regexp.match(t)
    265         if not m: raise Exception("Unknown syntax: %s" % t)
    266         (cp_lo, cp_hi, name) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
    267         newset = range_uset(cp_lo, cp_hi)
    268       if not canon_map == None:
    269         cname = canonicalize(name)
    270         if not canon_map.has_key(cname):  raise Exception("Unknown property or property value name '%s'" % cname)
    271         name = canon_map[cname]
    272       if not value_map.has_key(name):
    273         value_map[name] = newset
    274         name_list_order.append(name)
    275       else: value_map[name] = uset_union(value_map[name], newset)
    276    return (name_list_order, value_map)
    277 
    278 
     252    value_map = {}
     253    name_list_order = []
     254    f = open(UCD_dir + "/" + mapfile)
     255    lines = f.readlines()
     256    for t in lines:
     257        if UCD_skip.match(t):
     258            continue  # skip comment and blank lines
     259        m = UCD_point_name_regexp.match(t)
     260        if m:
     261            (codepoint, name) = (int(m.group(1), 16), m.group(2))
     262            newset = singleton_uset(codepoint)
     263        else:
     264            m = UCD_range_name_regexp.match(t)
     265            if not m: raise Exception("Unknown syntax: %s" % t)
     266            (cp_lo, cp_hi, name) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
     267            newset = range_uset(cp_lo, cp_hi)
     268        if not canon_map == None:
     269            cname = canonicalize(name)
     270            if not canon_map.has_key(cname):
     271                raise Exception("Unknown property or property value name '%s'" % cname)
     272            name = canon_map[cname]
     273        if not value_map.has_key(name):
     274            value_map[name] = newset
     275            name_list_order.append(name)
     276        else: value_map[name] = uset_union(value_map[name], newset)
     277    return (name_list_order, value_map)
     278
     279
Note: See TracChangeset for help on using the changeset viewer.