Changeset 4186 for proto


Ignore:
Timestamp:
Sep 23, 2014, 12:47:34 PM (4 years ago)
Author:
cameron
Message:

Restructing.

Location:
proto/charsetcompiler/UCD
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4185 r4186  
    2727UCD_property_section_regexp = re.compile("^#\s*([-A-Za-z_0-9]+)\s*Properties\s*$")
    2828UCD_property_alias_regexp = re.compile("^([-A-Za-z_0-9]+)\s*;\s*([-A-Za-z_0-9]+)([^#]*)")
    29 
    30 def parse_PropertyAlias_txt():
    31    property_enum_name_list = []
    32    full_name_map = {}
    33    property_lookup_map = {}
    34    property_kind_map = {}
    35    property_kind = "unspecified"
    36    f = open(UCD_dir + "/" + 'PropertyAliases.txt')
    37    lines = f.readlines()
    38    for t in lines:
    39       m = UCD_property_section_regexp.match(t)
    40       if m:
    41         property_kind = m.group(1)
    42       if UCD_skip.match(t): continue  # skip comment and blank lines
    43       m = UCD_property_alias_regexp.match(t)
    44       if not m: raise Exception("Unknown property alias syntax: %s" % t)
    45       prop_enum = m.group(1).lower()
    46       prop_preferred_full_name = m.group(2)
    47       prop_extra = m.group(3)
    48       prop_aliases = re.findall("[-A-Za-z_0-9]+", prop_extra)
    49       property_enum_name_list.append(prop_enum)
    50       full_name_map[prop_enum] = prop_preferred_full_name
    51       property_lookup_map[canonicalize(prop_enum)] = prop_enum
    52       property_lookup_map[canonicalize(prop_preferred_full_name)] = prop_enum
    53       for a in prop_aliases: property_lookup_map[canonicalize(a)] = prop_enum
    54       property_kind_map[prop_enum] = property_kind
    55    return (property_enum_name_list, full_name_map, property_lookup_map, property_kind_map)
    5629
    5730trivial_name_char_re = re.compile('[-_\s]')
     
    6639%s
    6740  };
    68   const std::string property_full_name[] = {
     41  const std::vector<std::string> property_full_name = {
    6942%s
    7043  };
     
    7649
    7750
    78 def generate_PropertyAliases_h(property_enum_name_list, full_name_map, property_lookup_map):
    79    f = cformat.open_header_file_for_write('PropertyAliases')
    80    cformat.write_imports(f, ["<string>", "<unordered_map>"])
    81    #enum_text = multiline_join(property_enum_name_list, 4, ',')
    82    enum_text = cformat.multiline_fill(property_enum_name_list, ',')
    83    #full_name_text = multiline_join(['"%s"' % full_name_map[e] for e in property_enum_name_list], 2, ',')
    84    full_name_text = cformat.multiline_fill(['"%s"' % full_name_map[e] for e in property_enum_name_list], ',')
    85    #map_text = multiline_join(['{"%s", %s}' % (k, property_lookup_map[k]) for k in sorted(property_lookup_map.keys())], 2,',')
    86    map_text = cformat.multiline_fill(['{"%s", %s}' % (k, property_lookup_map[k]) for k in sorted(property_lookup_map.keys())], ',')
    87    f.write(PropertyAliases_template % (enum_text, full_name_text, map_text))
    88    cformat.close_header_file(f)
    89 
    90 #
    91 #  UCD Property File Format 2: property value aliases
    92 #  PropertyValueAliases.txt
    93 #
    94 #  This file records value aliases for property values for
    95 #  each enumerated property, with the following additional notes:
    96 #  (1) The corresponding integer value of the enum constant is
    97 #      also specified for ccc (second field).
    98 #  (2) The Age property is a numeric type which has decimal float
    99 #      values as the enum constants: these won't be legal in enum syntax.
    100 #  (3) Binary properties also have enumerated values and aliases listed,
    101 #      although this is redundant, because all binary properties have the
    102 #      same value space.
    103 #
    104 
    105 UCD_property_value_alias_regexp = re.compile("^([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)([^#]*)")
    106 
    107 def parse_PropertyValueAlias_txt(property_lookup_map):
    108     property_value_list = {}
    109     property_value_enum_integer = {}
    110     property_value_full_name_map = {}
    111     property_value_lookup_map = {}
    112     f = open(UCD_dir + "/" + 'PropertyValueAliases.txt')
    113     lines = f.readlines()
    114     for t in lines:
    115         if UCD_skip.match(t): continue  # skip comment and blank lines
    116         m = UCD_property_value_alias_regexp.match(t)
    117         if not m: raise Exception("Unknown property value alias syntax: %s" % t)
    118         prop_code = canonicalize(m.group(1))
    119         if not property_lookup_map.has_key(prop_code): raise Exception("Property code: '%s' is unknown" % prop_code)
    120         else: prop_code = property_lookup_map[prop_code]
    121         if not property_value_list.has_key(prop_code):
    122           property_value_list[prop_code] = []
    123           property_value_enum_integer[prop_code] = {}
    124           property_value_full_name_map[prop_code] = {}
    125           property_value_lookup_map[prop_code] = {}
    126           enum_integer = 0
    127         # Special case for ccc: second field is enum integer value
    128         if prop_code == 'ccc':
    129           enum_integer = int(m.group(2))
    130           value_enum = m.group(3)
    131           extra = m.group(4)
    132           extra_list = re.findall("[-A-Za-z_0-9.]+", extra)
    133           value_preferred_full_name = extra_list[0]
    134           value_aliases = extra_list[1:]
    135         # Special case for age: second field is numeric, third field is enum
    136         # treat numeric value as an alias string
    137         elif prop_code == 'age':
    138           value_enum = m.group(3)
    139           value_preferred_full_name = m.group(3)
    140           extra = m.group(4)
    141           value_aliases = [m.group(2)] + re.findall("[-A-Za-z_0-9]+", extra)
    142         else:
    143           value_enum = m.group(2)
    144           value_preferred_full_name = m.group(3)
    145           extra = m.group(4)
    146           value_aliases = re.findall("[-A-Za-z_0-9]+", extra)
    147         property_value_list[prop_code].append(value_enum)
    148         property_value_enum_integer[prop_code][value_enum] = enum_integer
    149         enum_integer += 1
    150         property_value_full_name_map[prop_code][value_enum] = value_preferred_full_name
    151         property_value_lookup_map[prop_code][canonicalize(value_enum)] = value_enum
    152         property_value_lookup_map[prop_code][canonicalize(value_preferred_full_name)] = value_enum
    153         for a in value_aliases: property_value_lookup_map[prop_code][canonicalize(a)] = value_enum
    154     return (property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map)
    15551
    15652
     
    17268  namespace %s {
    17369    enum value_t {
    174 %s};
    175     const std::string value_names[] = {
    176 %s};
    177     const std::unordered_map<std::string, int> aliases_only_map[] = {
    178 %s};
     70      %s};
     71    const std::vector<std::string> value_names = {
     72      %s};
     73    const std::unordered_map<std::string, int> aliases_only_map = {
     74      %s};
    17975  }
    18076"""
     
    19288
    19389
    194 def generate_PropertyValueAliases_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map):
    195    f = cformat.open_header_file_for_write('PropertyValueAliases')
    196    cformat.write_imports(f, ["<string>", "<unordered_map>", '"PropertyAliases.h"'])
    197    #  Generate the aliases for all Binary properties.
    198    full_name_text = cformat.multiline_fill(['"No"', '"Yes"'], ',', 6)
    199    binary_map_text = cformat.multiline_fill(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], ',', 6)
    200    #
    201    for p in property_enum_name_list:
    202      if property_value_list.has_key(p):
    203        if not property_value_list[p] == ['N', 'Y']:
    204          enum_text = cformat.multiline_fill(property_value_list[p], ',', 6)
    205          if p == 'ccc': # Special case: add numeric value information for ccc.
    206            enum_text += r"""
    207     };
    208     const uint8_t enum_val[] = {
    209 """
    210            enum_text += cformat.multiline_fill(["%s" % (property_value_enum_integer[p][e]) for e in property_value_list['ccc']], ',', 6)
    211          full_names = [property_value_full_name_map[p][e] for e in property_value_list[p]]
    212          full_name_text = cformat.multiline_fill(['"%s"' % name for name in full_names], ',', 6)
    213          canon_full_names = [canonicalize(name) for name in full_names]
    214          aliases_only = [k for k in property_value_lookup_map[p].keys() if not canonicalize(k) in canon_full_names]
    215          map_text = cformat.multiline_fill(['{"%s", %s::%s}' % (k, p.upper(), property_value_lookup_map[p][k]) for k in sorted(aliases_only)], ',', 6)
    216          f.write(EnumeratedProperty_template % (p.upper(), enum_text, full_name_text, map_text))
    217    cformat.close_header_file(f)
    21890
    21991
     
    268140   return (name_list_order, value_map)
    269141
    270 def generate_property_value_file(filename_root, property_code, canonical_property_value_map):
    271    (prop_values, value_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_value_map)
    272    f = cformat.open_header_file_for_write(os.path.basename(filename_root))
    273    cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"PropertyValueSets.h"'])
    274    f.write("\nusing namespace UCD;\n\n")
    275    print "%s bytes" % sum([value_map[v].bytes() for v in value_map.keys()])
    276    for v in prop_values:
    277      f.write(value_map[v].showC('value_sets[%s][%s::%s]' % (property_code, property_code.upper(), v)))
    278    cformat.close_header_file(f)
    279142   
    280 def generate_binary_properties_file(filename_root, canonical_property_name_map):
    281    (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_name_map)
    282    f = cformat.open_header_file_for_write(os.path.basename(filename_root))
    283    cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueSets.h"'])
    284    f.write("\nusing namespace UCD;\n\n")
    285    print "%s bytes" % sum([prop_map[p].bytes() for p in prop_map.keys()])
     143def generate_binary_property_stubs(props):
     144   f = cformat.open_header_file_for_write('PropertyValueStubs')
     145   cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"'])
     146   f.write("\nnamespace UCD {\n")
    286147   for p in sorted(props):
    287      f.write(prop_map[p].showC('value_sets[%s][0]' % (p)))
     148     f.write("  namespace %s {\n    const UnsupportedPropertyObject property_object{%s, BinaryProperty};\n  }\n" % (p.upper(), p))
     149   f.write("}\n\n")
    288150   cformat.close_header_file(f)
    289151     
    290152CodepointProperties = ['scf', 'slc', 'suc', 'stc']
    291 
    292 def generate_PropertyObjectArray_h(property_enum_name_list, property_kind_map):
    293    f = cformat.open_header_file_for_write('PropertyObjectArray')
    294    cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyAliases.h"'])
    295    objlist = []
    296    for p in property_enum_name_list:
    297      k = property_kind_map[p]
    298      if k == 'Enumerated' or k == 'Catalog':
    299         objlist.append("EnumeratedPropertyObject(UCD::%s, UCD::%s::value_names, UCD::%s::aliases_only_map)" % (p, p.upper(), p.upper()))
    300      elif k == 'String':
    301         if p in CodepointProperties:
    302           objlist.append("CodepointPropertyObject(UCD::%s)" % p)
    303         else:
    304           objlist.append("StringPropertyObject(UCD::%s)" % p)
    305      else:
    306         objlist.append("%sPropertyObject(UCD::%s)" % (k, p))
    307    f.write("\nUnicodePropertyObject property_object_array[] = {\n    ")
    308    f.write(",\n    ".join(objlist) + '};\n')
    309    cformat.close_header_file(f)
    310153
    311154def generate_ScriptExtensions_h():
     
    323166   print "%s bytes" % sum([map2[k].bytes() for k in map2.keys()])
    324167   for k in sorted(map2.keys()):
    325      f.write(map2[k].showC('value_sets[scx][SC::%s]' % k.lower()))
     168     pass#f.write(map2[k].showC('value_sets[scx][SC::%s]' % k.lower()))
    326169   cformat.close_header_file(f)
    327170
    328171
     172class UCD_generator():
     173        def __init__(self, UCD_dir):
     174                self.UCD_dir = UCD_dir
     175                self.supported_props = []
     176                self.property_data_headers = []
     177
     178        def parse_PropertyAlias_txt(self):
     179           self.property_enum_name_list = []
     180           self.full_name_map = {}
     181           self.property_lookup_map = {}
     182           self.property_kind_map = {}
     183           property_kind = "unspecified"
     184           f = open(self.UCD_dir + "/" + 'PropertyAliases.txt')
     185           lines = f.readlines()
     186           for t in lines:
     187              m = UCD_property_section_regexp.match(t)
     188              if m:
     189                property_kind = m.group(1)
     190              if UCD_skip.match(t): continue  # skip comment and blank lines
     191              m = UCD_property_alias_regexp.match(t)
     192              if not m: raise Exception("Unknown property alias syntax: %s" % t)
     193              prop_enum = m.group(1).lower()
     194              prop_preferred_full_name = m.group(2)
     195              prop_extra = m.group(3)
     196              prop_aliases = re.findall("[-A-Za-z_0-9]+", prop_extra)
     197              self.property_enum_name_list.append(prop_enum)
     198              self.full_name_map[prop_enum] = prop_preferred_full_name
     199              self.property_lookup_map[canonicalize(prop_enum)] = prop_enum
     200              self.property_lookup_map[canonicalize(prop_preferred_full_name)] = prop_enum
     201              for a in prop_aliases: self.property_lookup_map[canonicalize(a)] = prop_enum
     202              self.property_kind_map[prop_enum] = property_kind
     203
     204        def generate_PropertyAliases_h(self):
     205           f = cformat.open_header_file_for_write('PropertyAliases')
     206           cformat.write_imports(f, ["<string>", "<vector>", "<unordered_map>"])
     207           enum_text = cformat.multiline_fill(self.property_enum_name_list, ',')
     208           full_name_text = cformat.multiline_fill(['"%s"' % self.full_name_map[e] for e in self.property_enum_name_list], ',')
     209           map_text = cformat.multiline_fill(['{"%s", %s}' % (k, self.property_lookup_map[k]) for k in sorted(self.property_lookup_map.keys())], ',')
     210           f.write(PropertyAliases_template % (enum_text, full_name_text, map_text))
     211           cformat.close_header_file(f)
     212
     213#
     214#  UCD Property File Format 2: property value aliases
     215#  PropertyValueAliases.txt
     216#
     217#  This file records value aliases for property values for
     218#  each enumerated property, with the following additional notes:
     219#  (1) The corresponding integer value of the enum constant is
     220#      also specified for ccc (second field).
     221#  (2) The Age property is a numeric type which has decimal float
     222#      values as the enum constants: these won't be legal in enum syntax.
     223#  (3) Binary properties also have enumerated values and aliases listed,
     224#      although this is redundant, because all binary properties have the
     225#      same value space.
     226#
     227
     228        def parse_PropertyValueAlias_txt(self):
     229            UCD_property_value_alias_regexp = re.compile("^([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)\s*;\s*([-A-Za-z_0-9.]+)([^#]*)")
     230            self.property_value_list = {}
     231            self.property_value_enum_integer = {}
     232            self.property_value_full_name_map = {}
     233            self.property_value_lookup_map = {}
     234            f = open(self.UCD_dir + "/" + 'PropertyValueAliases.txt')
     235            lines = f.readlines()
     236            for t in lines:
     237                if UCD_skip.match(t): continue  # skip comment and blank lines
     238                m = UCD_property_value_alias_regexp.match(t)
     239                if not m: raise Exception("Unknown property value alias syntax: %s" % t)
     240                prop_code = canonicalize(m.group(1))
     241                if not self.property_lookup_map.has_key(prop_code): raise Exception("Property code: '%s' is unknown" % prop_code)
     242                else: prop_code = self.property_lookup_map[prop_code]
     243                if not self.property_value_list.has_key(prop_code):
     244                  self.property_value_list[prop_code] = []
     245                  self.property_value_enum_integer[prop_code] = {}
     246                  self.property_value_full_name_map[prop_code] = {}
     247                  self.property_value_lookup_map[prop_code] = {}
     248                  enum_integer = 0
     249                # Special case for ccc: second field is enum integer value
     250                if prop_code == 'ccc':
     251                  enum_integer = int(m.group(2))
     252                  value_enum = m.group(3)
     253                  extra = m.group(4)
     254                  extra_list = re.findall("[-A-Za-z_0-9.]+", extra)
     255                  value_preferred_full_name = extra_list[0]
     256                  value_aliases = extra_list[1:]
     257                # Special case for age: second field is numeric, third field is enum
     258                # treat numeric value as an alias string
     259                elif prop_code == 'age':
     260                  value_enum = m.group(3)
     261                  value_preferred_full_name = m.group(3)
     262                  extra = m.group(4)
     263                  value_aliases = [m.group(2)] + re.findall("[-A-Za-z_0-9]+", extra)
     264                else:
     265                  value_enum = m.group(2)
     266                  value_preferred_full_name = m.group(3)
     267                  extra = m.group(4)
     268                  value_aliases = re.findall("[-A-Za-z_0-9]+", extra)
     269                self.property_value_list[prop_code].append(value_enum)
     270                self.property_value_enum_integer[prop_code][value_enum] = enum_integer
     271                enum_integer += 1
     272                self.property_value_full_name_map[prop_code][value_enum] = value_preferred_full_name
     273                self.property_value_lookup_map[prop_code][canonicalize(value_enum)] = value_enum
     274                self.property_value_lookup_map[prop_code][canonicalize(value_preferred_full_name)] = value_enum
     275                for a in value_aliases: self.property_value_lookup_map[prop_code][canonicalize(a)] = value_enum
     276
     277
     278        def generate_PropertyValueAliases_h(self):
     279           f = cformat.open_header_file_for_write('PropertyValueAliases')
     280           cformat.write_imports(f, ["<string>", "<unordered_map>", '"PropertyAliases.h"'])
     281           f.write("namespace UCD {\n")
     282           #  Generate the aliases for all Binary properties.
     283           enum_text = cformat.multiline_fill(['N', 'Y'], ',', 6)
     284           full_name_text = cformat.multiline_fill(['"No"', '"Yes"'], ',', 6)
     285           binary_map_text = cformat.multiline_fill(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], ',', 6)
     286           f.write(EnumeratedProperty_template % ('Binary', enum_text, full_name_text, binary_map_text))
     287           #
     288           for p in self.property_enum_name_list:
     289             if self.property_value_list.has_key(p):
     290               if not self.property_kind_map[p] == 'Binary':
     291                 enum_text = cformat.multiline_fill(self.property_value_list[p], ',', 6)
     292                 if p == 'ccc': # Special case: add numeric value information for ccc.
     293                   enum_text += r"""
     294            };
     295            const uint8_t enum_val[] = {
     296        """
     297                   enum_text += "      " + cformat.multiline_fill(["%s" % (self.property_value_enum_integer[p][e]) for e in self.property_value_list['ccc']], ',', 6)
     298                 full_names = [self.property_value_full_name_map[p][e] for e in self.property_value_list[p]]
     299                 full_name_text = cformat.multiline_fill(['"%s"' % name for name in full_names], ',', 6)
     300                 canon_full_names = [canonicalize(name) for name in full_names]
     301                 aliases_only = [k for k in self.property_value_lookup_map[p].keys() if not canonicalize(k) in canon_full_names]
     302                 map_text = cformat.multiline_fill(['{"%s", %s::%s}' % (k, p.upper(), self.property_value_lookup_map[p][k]) for k in sorted(aliases_only)], ',', 6)
     303                 f.write(EnumeratedProperty_template % (p.upper(), enum_text, full_name_text, map_text))
     304           f.write("}\n")
     305           cformat.close_header_file(f)
     306
     307     
     308        def generate_property_value_file(self, filename_root, property_code):
     309           canonical_property_value_map = self.property_value_lookup_map[property_code]
     310           (prop_values, value_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_value_map)
     311           basename = os.path.basename(filename_root)
     312           f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     313           cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"'])
     314           f.write("\nnamespace UCD {\n")
     315           print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
     316           f.write("  namespace %s {\n" % property_code.upper())
     317           for v in prop_values:
     318             f.write("    const UnicodeSet %s_Set \n" % v.lower())
     319             f.write(value_map[v].showC(6) + ";\n")
     320           set_list = ['%s_Set' % v.lower() for v in prop_values]
     321           f.write("    const EnumeratedPropertyObject property_object\n")
     322           f.write("      {%s,\n" % property_code)
     323           f.write("       %s::value_names,\n" % property_code.upper())
     324           f.write("       %s::aliases_only_map,\n" % property_code.upper())
     325           f.write("       {")
     326           f.write(cformat.multiline_fill(set_list, ',', 8))
     327           f.write("\n       }};\n  }\n}\n")
     328           cformat.close_header_file(f)
     329           self.supported_props.append(property_code)
     330           self.property_data_headers.append(basename)
     331
     332        def generate_binary_properties_file(self, filename_root):
     333           (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
     334           basename = os.path.basename(filename_root)
     335           f = cformat.open_header_file_for_write(basename)
     336           cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"'])
     337           f.write("\nnamespace UCD {\n")
     338           print "%s: %s bytes" % (basename, sum([prop_map[p].bytes() for p in prop_map.keys()]))
     339           for p in sorted(props):
     340             f.write("  namespace %s {\n    const UnicodeSet codepoint_set \n" % p.upper())
     341             f.write(prop_map[p].showC(6) + ";\n")
     342             f.write("    const BinaryPropertyObject property_object{%s, codepoint_set};\n  }\n" % p)
     343           f.write("}\n\n")
     344           cformat.close_header_file(f)
     345           self.supported_props += props
     346           self.property_data_headers.append(basename)
     347
     348        def generate_PropertyObjectTable_h(self):
     349           f = cformat.open_header_file_for_write('PropertyObjectTable')
     350           cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyAliases.h"'])
     351           cformat.write_imports(f, ['"%s.h"' % fname for fname in self.property_data_headers])
     352           f.write("\nnamespace UCD {\n")
     353           objlist = []
     354           for p in self.property_enum_name_list:
     355             k = self.property_kind_map[p]
     356             if (k == 'Enumerated' or k == 'Catalog') and p in self.supported_props:
     357                objlist.append("&%s::property_object" % p.upper())
     358             elif k == 'String':
     359                if p in CodepointProperties:
     360                  objlist.append("new UnsupportedPropertyObject(%s, CodepointProperty)" % p)
     361                else:
     362                  objlist.append("new UnsupportedPropertyObject(%s, StringProperty)" % p)
     363             elif k == 'Binary' and p in self.supported_props:
     364                objlist.append("&%s::property_object" % p.upper())
     365             else:
     366                objlist.append("new UnsupportedPropertyObject(%s, %sProperty)" % (p, k))
     367           f.write("\n  const PropertyObject* property_object_table[] = {\n    ")
     368           f.write(",\n    ".join(objlist) + '  };\n}\n')
     369           cformat.close_header_file(f)
     370
     371
    329372
    330373def UCD_main():
     374   ucd = UCD_generator(UCD_dir)
     375
    331376   # First parse all property names and their aliases
    332    (property_enum_name_list, full_name_map, property_lookup_map, property_kind_map) = parse_PropertyAlias_txt()
    333    generate_PropertyAliases_h(property_enum_name_list, full_name_map, property_lookup_map)
    334    # Next parse all property value names and their aliases
    335    (property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map) = parse_PropertyValueAlias_txt(property_lookup_map)
    336    generate_PropertyValueAliases_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map)
    337    #
    338    generate_PropertyValueSets_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map)
    339    #
    340    generate_PropertyObjectArray_h(property_enum_name_list, property_kind_map)
    341    #
    342    # Blocks
    343    generate_property_value_file('Blocks', 'blk', property_value_lookup_map['blk'])
     377   ucd.parse_PropertyAlias_txt()
     378   #
     379   # Generate the PropertyAliases.h file to define all the Unicode property_t enum
     380   # and the basic property information.
     381   ucd.generate_PropertyAliases_h()
     382   #
     383   # Next parse all property value names and their aliases.  Generate the data.
     384   ucd.parse_PropertyValueAlias_txt()
     385   ucd.generate_PropertyValueAliases_h()
     386   #
     387   # The Block property
     388   ucd.generate_property_value_file('Blocks', 'blk')
    344389   #
    345390   # Scripts
    346    generate_property_value_file('Scripts', 'sc', property_value_lookup_map['sc'])
     391   ucd.generate_property_value_file('Scripts', 'sc')
    347392   #
    348393   # Script Extensions
    349    generate_ScriptExtensions_h()
     394   #generate_ScriptExtensions_h()
    350395   #
    351396   # General Category
    352    generate_property_value_file('extracted/DerivedGeneralCategory', 'gc', property_value_lookup_map['gc'])
    353 
     397   ucd.generate_property_value_file('extracted/DerivedGeneralCategory', 'gc')
    354398   #
    355399   # Binary properties from PropList.txt
    356    generate_binary_properties_file('PropList', property_lookup_map)
     400   ucd.generate_binary_properties_file('PropList')
    357401   #
    358402   # Binary properties from DerivedCoreProperties.txt
    359    generate_binary_properties_file('DerivedCoreProperties', property_lookup_map)
     403   ucd.generate_binary_properties_file('DerivedCoreProperties')
     404   #
    360405   #
    361406   # LineBreak types
    362    generate_property_value_file('LineBreak', 'lb', property_value_lookup_map['lb'])
     407   ucd.generate_property_value_file('LineBreak', 'lb')
    363408   #
    364409   # East Asian Width
    365    generate_property_value_file('EastAsianWidth', 'ea', property_value_lookup_map['ea'])
     410   ucd.generate_property_value_file('EastAsianWidth', 'ea')
    366411   #
    367412   # Hangul Syllable Type
    368    generate_property_value_file('HangulSyllableType', 'hst', property_value_lookup_map['hst'])
    369 
     413   ucd.generate_property_value_file('HangulSyllableType', 'hst')
    370414   #
    371415   # Jamo Short Name - AAARGH - property value for 110B is an empty string!!!!!  - Not in PropertyValueAliases.txt
    372    # generate_property_value_file('Jamo', 'jsn', property_value_lookup_map['jsn'])
    373 
     416   # ucd.generate_property_value_file('Jamo', 'jsn')
     417   #
     418   # Stubs
     419   #
     420   ucd.generate_PropertyObjectTable_h()
    374421
    375422if __name__ == "__main__":
  • proto/charsetcompiler/UCD/cformat.py

    r4183 r4186  
    3737      length_this_line += len(item_list[0]) + len(sep_with_space)
    3838      item_list = item_list[1:]
    39     lines += (" " * indent) + sep_with_space.join(items_this_line)
    40     if len(item_list) > 0: lines += separator + "\n"
     39    lines += sep_with_space.join(items_this_line)
     40    if len(item_list) > 0: lines += separator + "\n" + (" " * indent)
    4141  return lines
    4242
  • proto/charsetcompiler/UCD/unicode_set.py

    r4183 r4186  
    6161
    6262   # printing
    63    def showC(self, name, indent = 4):
     63   def showC(self, indent = 4):
    6464      hex_specifier =  "%%#0%ix" % (quad_bits/4 + 2)
    6565      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
    6666      formatted_runs = ['{%s, %i}' % (runtype[r[0]], r[1]) for r in self.runs]
    6767      formatted_quads = [hex_specifier % q for q in self.quads]
    68       setrep = (" " * indent) + ("%s.runs = {" % name)
    69       setrep += cformat.multiline_fill(formatted_runs, ',', indent)
    70       setrep += '};\n'
    71       setrep += (" " * indent) + ("%s.quads = {\n" % name)
    72       setrep += cformat.multiline_fill(formatted_quads, ',', indent)
    73       setrep += '};\n'
     68      setrep = (" " * indent) + "{{"
     69      setrep += cformat.multiline_fill(formatted_runs, ',', indent+2)
     70      setrep += '},\n'
     71      setrep += (" " * indent) + " {"
     72      setrep += cformat.multiline_fill(formatted_quads, ',', indent+2)
     73      setrep += '},\n'
     74      setrep += (" " * indent) + " %i}" % self.quad_count
    7475      return setrep
    7576
Note: See TracChangeset for help on using the changeset viewer.