Changeset 4150 for proto


Ignore:
Timestamp:
Sep 11, 2014, 1:14:42 PM (4 years ago)
Author:
cameron
Message:

Property names and aliases

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4148 r4150  
    1010#
    1111#
    12 import re
     12import re, string
    1313from unicode_set import *
    1414
    1515UCD_dir = "7.0.0"
     16
     17
    1618#
    1719#  Processing files of the UCD
    1820#
     21#  General format for skippable comments, blank lines
     22UCD_skip = re.compile("^#.*$|^\s*$")
    1923
     24#
     25#  UCD Property File Format 1: property aliases
     26#  PropertyAliases.txt
     27#
     28UCD_property_alias_regexp = re.compile("^([-A-Za-z_0-9]+)\s*;\s*([-A-Za-z_0-9]+)([^#]*)")
     29
     30def parse_PropertyAlias_txt():
     31   property_enum_name_list = []
     32   full_name_map = {}
     33   property_lookup_map = {}
     34   f = open(UCD_dir + "/" + 'PropertyAliases.txt')
     35   lines = f.readlines()
     36   for t in lines:
     37      if UCD_skip.match(t): continue  # skip comment and blank lines
     38      m = UCD_property_alias_regexp.match(t)
     39      if not m: raise Exception("Unknown property alias syntax: %s" % t)
     40      prop_enum = m.group(1)
     41      prop_preferred_full_name = m.group(2)
     42      prop_extra = m.group(3)
     43      prop_aliases = re.findall("[-A-Za-z_0-9]+", prop_extra)
     44      property_enum_name_list.append(prop_enum)
     45      full_name_map[prop_enum] = prop_preferred_full_name
     46      property_lookup_map[canonicalize(prop_enum)] = prop_enum
     47      property_lookup_map[canonicalize(prop_preferred_full_name)] = prop_enum
     48      for a in prop_aliases: property_lookup_map[canonicalize(a)] = prop_enum
     49   return (property_enum_name_list, full_name_map, property_lookup_map)
     50
     51trivial_name_char_re = re.compile('[-_\s]')
     52def canonicalize(property_string):
     53   c = trivial_name_char_re.sub('', property_string.lower())
     54   if len(c) > 2 and c[0:2] == "is": return c[2:]
     55   else: return c
     56
     57
     58PropertyAliases_template = r"""
     59namespace UCD {
     60  enum class property_t {
     61    %s
     62  };
     63  std::string[] property_full_name;
     64%s
     65
     66  std::map<std::string, property> alias_map;
     67%s
     68
     69}
     70"""
     71
     72enums_per_line = 8
     73def generate_PropertyAliases_h():
     74   (property_enum_name_list, full_name_map, property_lookup_map) = parse_PropertyAlias_txt()
     75   f = open('PropertyAliases.h', 'w')
     76   enum_text = property_enum_name_list[0]
     77   for i in range(1, len(property_enum_name_list)):
     78     if i % enums_per_line == 0: enum_text += ",\n    "
     79     else: enum_text += ", "
     80     enum_text += property_enum_name_list[i]
     81   full_name_text = ""
     82   for e in property_enum_name_list:
     83     full_name_text += '  property_full_name[%s] = "%s";\n' % (e, full_name_map[e])
     84   map_text = ""
     85   for k in sorted(property_lookup_map.keys()):
     86     map_text += '  alias_map.insert(make_pair("%s", %s));\n' % (k, property_lookup_map[k])
     87   f.write(PropertyAliases_template % (enum_text, full_name_text, map_text))
     88   f.close()
     89   
     90#
     91#  UCD Property File Format 2:  codepoint -> name maps
     92#
    2093UCD_skip = re.compile("^#.*$|^\s*$")
    2194UCD_point_name_regexp = re.compile("^([0-9A-F]{4,6})\s*;\s*((?:[-A-Za-z0-9_]+\s+)*[-A-Za-z0-9_]+)\s*(?:[;#]|$)")
Note: See TracChangeset for help on using the changeset viewer.