Ignore:
Timestamp:
Sep 12, 2014, 6:23:25 AM (5 years ago)
Author:
cameron
Message:

Parsing PropertyValueAliases?.txt

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4150 r4153  
    5858PropertyAliases_template = r"""
    5959namespace UCD {
    60   enum class property_t {
     60  enum property_t {
    6161    %s
    6262  };
     
    8989   
    9090#
    91 #  UCD Property File Format 2:  codepoint -> name maps
     91#  UCD Property File Format 2: property value aliases
     92#  PropertyValueAliases.txt
     93#
     94#  This file records value aliases for property values for
     95#  each enumerated property, with the following additional notes:
     96#  (1) The corresponding integer value of the enum constant is
     97#      also specified for ccc (second field).
     98#  (2) The Age property is a numeric type which has decimal float
     99#      values as the enum constants: these won't be legal in enum syntax.
     100#  (3) Binary properties also have enumerated values and aliases listed,
     101#      although this is redundant, because all binary properties have the
     102#      same value space.
     103#
     104
     105UCD_property_value_alias_regexp = re.compile("^([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)([^#]*)")
     106
     107def parse_PropertyValueAlias_txt():
     108    property_value_list = {}
     109    property_value_enum_integer = {}
     110    property_value_full_name_map = {}
     111    property_value_lookup_map = {}
     112    f = open(UCD_dir + "/" + 'PropertyValueAliases.txt')
     113    lines = f.readlines()
     114    for t in lines:
     115        if UCD_skip.match(t): continue  # skip comment and blank lines
     116        m = UCD_property_value_alias_regexp.match(t)
     117        if not m: raise Exception("Unknown property value alias syntax: %s" % t)
     118        prop_code = m.group(1)
     119        if not property_value_list.has_key(prop_code):
     120          property_value_list[prop_code] = []
     121          property_value_enum_integer[prop_code] = {}
     122          property_value_full_name_map[prop_code] = {}
     123          property_value_lookup_map[prop_code] = {}
     124          enum_integer = 0
     125        # Special case for ccc: second field is enum integer value
     126        if prop_code == 'ccc':
     127          enum_integer = int(m.group(2))
     128          value_enum = m.group(3)
     129          extra = m.group(4)
     130          extra_list = re.findall("[-A-Za-z_0-9.]+", extra)
     131          value_preferred_full_name = extra_list[0]
     132          value_aliases = extra_list[1:]
     133        else:
     134          value_enum = m.group(2)
     135          value_preferred_full_name = m.group(3)
     136          extra = m.group(4)
     137          value_aliases = re.findall("[-A-Za-z_0-9]+", extra)
     138        property_value_list[prop_code].append(value_enum)
     139        property_value_enum_integer[prop_code][value_enum] = enum_integer
     140        enum_integer += 1
     141        property_value_full_name_map[prop_code][value_enum] = value_preferred_full_name
     142        property_value_lookup_map[prop_code][canonicalize(value_enum)] = value_enum
     143        property_value_lookup_map[prop_code][canonicalize(value_preferred_full_name)] = value_enum
     144        for a in value_aliases: property_value_lookup_map[prop_code][canonicalize(a)] = value_enum
     145    return (property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map)
     146
     147
     148#
     149#  UCD Property File Format 3:  codepoint -> name maps
    92150#
    93151UCD_skip = re.compile("^#.*$|^\s*$")
Note: See TracChangeset for help on using the changeset viewer.