Changeset 4389


Ignore:
Timestamp:
Jan 2, 2015, 9:29:15 AM (4 years ago)
Author:
cameron
Message:

Generating binary properties, reformatting

Location:
proto/charsetcompiler
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/Makefile

    r4387 r4389  
    2828        python $(PABLO_COMPILER) Blocks.pablo -Z -t ucd_precompiled_blk.h -o src/precompiled_blk.h
    2929
     30core:
     31        python $(PABLO_COMPILER) DerivedCoreProperties.pablo -Z -t ucd_precompiled_derivedcoreproperties.h -o src/precompiled_derivedcoreproperties.h
     32
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4387 r4389  
    55#
    66# Robert D. Cameron
    7 # September 10, 2014
     7# January 2, 2015
    88#
    99# Licensed under Open Software License 3.0.
     
    2121%s
    2222  };
     23  const std::vector<std::string> property_enum_name = {
     24%s
     25  };
    2326  const std::vector<std::string> property_full_name = {
    2427%s
     
    6265class UCD_generator():
    6366    def __init__(self):
    64             self.supported_props = []
    65             self.property_data_headers = []
    66             self.missing_specs = {}
     67        self.supported_props = []
     68        self.property_data_headers = []
     69        self.missing_specs = {}
    6770
    6871    def load_property_name_info(self):
    69        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
     72        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
    7073
    7174    def generate_PropertyAliases_h(self):
    72        f = cformat.open_header_file_for_write('PropertyAliases')
    73        cformat.write_imports(f, ["<string>", "<vector>", "<unordered_map>"])
    74        enum_text = cformat.multiline_fill(self.property_enum_name_list, ',')
    75        full_name_text = cformat.multiline_fill(['"%s"' % self.full_name_map[e] for e in self.property_enum_name_list], ',')
    76        map_text = cformat.multiline_fill(['{"%s", %s}' % (k, self.property_lookup_map[k]) for k in sorted(self.property_lookup_map.keys())], ',')
    77        f.write(PropertyAliases_template % (enum_text, full_name_text, map_text))
    78        cformat.close_header_file(f)
     75        f = cformat.open_header_file_for_write('PropertyAliases')
     76        cformat.write_imports(f, ["<string>", "<vector>", "<unordered_map>"])
     77        enum_text = cformat.multiline_fill(self.property_enum_name_list, ',')
     78        enum_text2 = cformat.multiline_fill(['"%s"' % e for e in self.property_enum_name_list], ',')
     79        full_name_text = cformat.multiline_fill(['"%s"' % self.full_name_map[e] for e in self.property_enum_name_list], ',')
     80        map_text = cformat.multiline_fill(['{"%s", %s}' % (k, self.property_lookup_map[k]) for k in sorted(self.property_lookup_map.keys())], ',')
     81        f.write(PropertyAliases_template % (enum_text, enum_text2, full_name_text, map_text))
     82        cformat.close_header_file(f)
    7983
    8084    def load_property_value_info(self):
    81        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
     85        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
    8286
    8387
    8488    def generate_PropertyValueAliases_h(self):
    85        f = cformat.open_header_file_for_write('PropertyValueAliases')
    86        cformat.write_imports(f, ["<string>", "<unordered_map>", '"PropertyAliases.h"'])
    87        f.write("namespace UCD {\n")
    88        #  Generate the aliases for all Binary properties.
    89        enum_text = cformat.multiline_fill(['N', 'Y'], ',', 6)
    90        enum_names = cformat.multiline_fill(['"N"', '"Y"'], ',', 6)
    91        full_name_text = cformat.multiline_fill(['"No"', '"Yes"'], ',', 6)
    92        binary_map_text = cformat.multiline_fill(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], ',', 6)
    93        f.write(EnumeratedProperty_template % ('Binary', enum_text, enum_names, full_name_text, binary_map_text))
    94        #
    95        for p in self.property_enum_name_list:
    96          if self.property_value_list.has_key(p):
    97            if not self.property_kind_map[p] == 'Binary':
    98              enum_text = cformat.multiline_fill(self.property_value_list[p], ',', 6)
    99              enum_names = cformat.multiline_fill(['"%s"' % s for s in self.property_value_list[p]], ',', 6)
    100              if p == 'ccc': # Special case: add numeric value information for ccc.
    101                enum_text += r"""
     89        f = cformat.open_header_file_for_write('PropertyValueAliases')
     90        cformat.write_imports(f, ["<string>", "<unordered_map>", '"PropertyAliases.h"'])
     91        f.write("namespace UCD {\n")
     92        #  Generate the aliases for all Binary properties.
     93        enum_text = cformat.multiline_fill(['N', 'Y'], ',', 6)
     94        enum_names = cformat.multiline_fill(['"N"', '"Y"'], ',', 6)
     95        full_name_text = cformat.multiline_fill(['"No"', '"Yes"'], ',', 6)
     96        binary_map_text = cformat.multiline_fill(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], ',', 6)
     97        f.write(EnumeratedProperty_template % ('Binary', enum_text, enum_names, full_name_text, binary_map_text))
     98        #
     99        for p in self.property_enum_name_list:
     100           if self.property_value_list.has_key(p):
     101              if not self.property_kind_map[p] == 'Binary':
     102                  enum_text = cformat.multiline_fill(self.property_value_list[p], ',', 6)
     103                  enum_names = cformat.multiline_fill(['"%s"' % s for s in self.property_value_list[p]], ',', 6)
     104                  if p == 'ccc': # Special case: add numeric value information for ccc.
     105                      enum_text += r"""
    102106        };
    103107        const uint8_t enum_val[] = {
    104108    """
    105                enum_text += "      " + cformat.multiline_fill(["%s" % (self.property_value_enum_integer[p][e]) for e in self.property_value_list['ccc']], ',', 6)
    106              full_names = [self.property_value_full_name_map[p][e] for e in self.property_value_list[p]]
    107              full_name_text = cformat.multiline_fill(['"%s"' % name for name in full_names], ',', 6)
    108              canon_full_names = [canonicalize(name) for name in full_names]
    109              canon_enums = [canonicalize(e) for e in self.property_value_list[p]]
    110              canon_keys = [canonicalize(k) for k in self.property_value_lookup_map[p].keys()]
    111              aliases_only = [k for k in canon_keys if not k in canon_enums + canon_full_names]
    112              map_text = cformat.multiline_fill(['{"%s", %s_ns::%s}' % (k, p.upper(), self.property_value_lookup_map[p][k]) for k in sorted(aliases_only)], ',', 6)
    113              f.write(EnumeratedProperty_template % (p.upper(), enum_text, enum_names, full_name_text, map_text))
    114        f.write("}\n")
    115        cformat.close_header_file(f)
     109                      enum_text += "      " + cformat.multiline_fill(["%s" % (self.property_value_enum_integer[p][e]) for e in self.property_value_list['ccc']], ',', 6)
     110                  full_names = [self.property_value_full_name_map[p][e] for e in self.property_value_list[p]]
     111                  full_name_text = cformat.multiline_fill(['"%s"' % name for name in full_names], ',', 6)
     112                  canon_full_names = [canonicalize(name) for name in full_names]
     113                  canon_enums = [canonicalize(e) for e in self.property_value_list[p]]
     114                  canon_keys = [canonicalize(k) for k in self.property_value_lookup_map[p].keys()]
     115                  aliases_only = [k for k in canon_keys if not k in canon_enums + canon_full_names]
     116                  map_text = cformat.multiline_fill(['{"%s", %s_ns::%s}' % (k, p.upper(), self.property_value_lookup_map[p][k]) for k in sorted(aliases_only)], ',', 6)
     117                  f.write(EnumeratedProperty_template % (p.upper(), enum_text, enum_names, full_name_text, map_text))
     118        f.write("}\n")
     119        cformat.close_header_file(f)
    116120
    117121 
    118122    def generate_property_value_file(self, filename_root, property_code, default_value = None):
    119        vlist = self.property_value_list[property_code]
    120        canon_map = self.property_value_lookup_map[property_code]
    121        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
    122        basename = os.path.basename(filename_root)
    123        f = cformat.open_header_file_for_write(os.path.basename(filename_root))
    124        cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"'])
    125        f.write("\nnamespace UCD {\n")
    126        f.write("  namespace %s_ns {\n" % property_code.upper())
    127        for v in self.property_value_list[property_code]:
    128          f.write("    const UnicodeSet %s_Set \n" % v.lower())
    129          f.write(value_map[v].showC(6) + ";\n")
    130        print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    131        set_list = ['%s_Set' % v.lower() for v in self.property_value_list[property_code]]
    132        f.write("    EnumeratedPropertyObject property_object\n")
    133        f.write("      {%s,\n" % property_code)
    134        f.write("       %s_ns::enum_names,\n" % property_code.upper())
    135        f.write("       %s_ns::value_names,\n" % property_code.upper())
    136        f.write("       %s_ns::aliases_only_map,\n" % property_code.upper())
    137        f.write("       {")
    138        f.write(cformat.multiline_fill(set_list, ',', 8))
    139        f.write("\n       }};\n  }\n}\n")
    140        cformat.close_header_file(f)
    141        self.supported_props.append(property_code)
    142        self.property_data_headers.append(basename)
     123        vlist = self.property_value_list[property_code]
     124        canon_map = self.property_value_lookup_map[property_code]
     125        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
     126        basename = os.path.basename(filename_root)
     127        f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     128        cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"'])
     129        f.write("\nnamespace UCD {\n")
     130        f.write("  namespace %s_ns {\n" % property_code.upper())
     131        for v in self.property_value_list[property_code]:
     132            f.write("    const UnicodeSet %s_Set \n" % v.lower())
     133            f.write(value_map[v].showC(6) + ";\n")
     134        print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
     135        set_list = ['%s_Set' % v.lower() for v in self.property_value_list[property_code]]
     136        f.write("    EnumeratedPropertyObject property_object\n")
     137        f.write("      {%s,\n" % property_code)
     138        f.write("       %s_ns::enum_names,\n" % property_code.upper())
     139        f.write("       %s_ns::value_names,\n" % property_code.upper())
     140        f.write("       %s_ns::aliases_only_map,\n" % property_code.upper())
     141        f.write("       {")
     142        f.write(cformat.multiline_fill(set_list, ',', 8))
     143        f.write("\n       }};\n  }\n}\n")
     144        cformat.close_header_file(f)
     145        self.supported_props.append(property_code)
     146        self.property_data_headers.append(basename)
    143147
    144148    def generate_ScriptExtensions_h(self):
    145        filename_root = 'ScriptExtensions'
    146        property_code = 'scx'
    147        (prop_values, value_map) = parse_ScriptExtensions_txt(self.property_value_list['sc'], self.property_value_lookup_map['sc'])
    148        basename = os.path.basename(filename_root)
    149        f = cformat.open_header_file_for_write(basename)
    150        cformat.write_imports(f, ["<vector>", '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    151        f.write("\nnamespace UCD {\n")
    152        f.write("  namespace SCX_ns {\n")
    153        for v in self.property_value_list['sc']:
    154          f.write("    const UnicodeSet %s_Ext \n" % v.lower())
    155          f.write(value_map[v].showC(6) + ";\n")
    156        print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    157        set_list = ['%s_Ext' % v.lower() for v in self.property_value_list['sc']]
    158        f.write("    EnumeratedPropertyObject property_object\n")
    159        f.write("      {%s,\n" % property_code)
    160        f.write("       SC_ns::enum_names,\n")
    161        f.write("       SC_ns::value_names,\n")
    162        f.write("       SC_ns::aliases_only_map,\n")
    163        f.write("       {")
    164        f.write(cformat.multiline_fill(set_list, ',', 8))
    165        f.write("\n       }};\n  }\n}\n")
    166        cformat.close_header_file(f)
    167        self.supported_props.append('property_code')
    168        self.property_data_headers.append(basename)
     149        filename_root = 'ScriptExtensions'
     150        property_code = 'scx'
     151        (prop_values, value_map) = parse_ScriptExtensions_txt(self.property_value_list['sc'], self.property_value_lookup_map['sc'])
     152        basename = os.path.basename(filename_root)
     153        f = cformat.open_header_file_for_write(basename)
     154        cformat.write_imports(f, ["<vector>", '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
     155        f.write("\nnamespace UCD {\n")
     156        f.write("  namespace SCX_ns {\n")
     157        for v in self.property_value_list['sc']:
     158            f.write("    const UnicodeSet %s_Ext \n" % v.lower())
     159            f.write(value_map[v].showC(6) + ";\n")
     160        print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
     161        set_list = ['%s_Ext' % v.lower() for v in self.property_value_list['sc']]
     162        f.write("    EnumeratedPropertyObject property_object\n")
     163        f.write("      {%s,\n" % property_code)
     164        f.write("       SC_ns::enum_names,\n")
     165        f.write("       SC_ns::value_names,\n")
     166        f.write("       SC_ns::aliases_only_map,\n")
     167        f.write("       {")
     168        f.write(cformat.multiline_fill(set_list, ',', 8))
     169        f.write("\n       }};\n  }\n}\n")
     170        cformat.close_header_file(f)
     171        self.supported_props.append('property_code')
     172        self.property_data_headers.append(basename)
    169173
    170174
    171175    def generate_binary_properties_file(self, filename_root):
    172        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
    173        basename = os.path.basename(filename_root)
    174        f = cformat.open_header_file_for_write(basename)
    175        cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"'])
    176        f.write("\nnamespace UCD {\n")
    177        print "%s: %s bytes" % (basename, sum([prop_map[p].bytes() for p in prop_map.keys()]))
    178        for p in sorted(props):
    179          f.write("  namespace %s_ns {\n    const UnicodeSet codepoint_set \n" % p.upper())
    180          f.write(prop_map[p].showC(6) + ";\n")
    181          f.write("    BinaryPropertyObject property_object{%s, codepoint_set};\n  }\n" % p)
    182        f.write("}\n\n")
    183        cformat.close_header_file(f)
    184        self.supported_props += props
    185        self.property_data_headers.append(basename)
     176        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
     177        basename = os.path.basename(filename_root)
     178        f = cformat.open_header_file_for_write(basename)
     179        cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"'])
     180        f.write("\nnamespace UCD {\n")
     181        print "%s: %s bytes" % (basename, sum([prop_map[p].bytes() for p in prop_map.keys()]))
     182        for p in sorted(props):
     183            f.write("  namespace %s_ns {\n    const UnicodeSet codepoint_set \n" % p.upper())
     184            f.write(prop_map[p].showC(6) + ";\n")
     185            f.write("    BinaryPropertyObject property_object{%s, codepoint_set};\n  }\n" % p)
     186        f.write("}\n\n")
     187        cformat.close_header_file(f)
     188        self.supported_props += props
     189        self.property_data_headers.append(basename)
    186190
    187191    def generate_PropertyObjectTable_h(self):
    188        f = cformat.open_header_file_for_write('PropertyObjectTable')
    189        cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyAliases.h"'])
    190        cformat.write_imports(f, ['"%s.h"' % fname for fname in self.property_data_headers])
    191        f.write("\nnamespace UCD {\n")
    192        objlist = []
    193        for p in self.property_enum_name_list:
    194          k = self.property_kind_map[p]
    195          if (k == 'Enumerated' or k == 'Catalog') and p in self.supported_props:
    196             objlist.append("&%s_ns::property_object" % p.upper())
    197          elif k == 'String':
    198             if p in CodepointProperties:
    199               objlist.append("new UnsupportedPropertyObject(%s, CodepointProperty)" % p)
     192        f = cformat.open_header_file_for_write('PropertyObjectTable')
     193        cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyAliases.h"'])
     194        cformat.write_imports(f, ['"%s.h"' % fname for fname in self.property_data_headers])
     195        f.write("\nnamespace UCD {\n")
     196        objlist = []
     197        for p in self.property_enum_name_list:
     198            k = self.property_kind_map[p]
     199            if (k == 'Enumerated' or k == 'Catalog') and p in self.supported_props:
     200                objlist.append("&%s_ns::property_object" % p.upper())
     201            elif k == 'String':
     202                if p in CodepointProperties:
     203                    objlist.append("new UnsupportedPropertyObject(%s, CodepointProperty)" % p)
     204                else:
     205                    objlist.append("new UnsupportedPropertyObject(%s, StringProperty)" % p)
     206            elif k == 'Binary' and p in self.supported_props:
     207                objlist.append("&%s_ns::property_object" % p.upper())
    200208            else:
    201               objlist.append("new UnsupportedPropertyObject(%s, StringProperty)" % p)
    202          elif k == 'Binary' and p in self.supported_props:
    203             objlist.append("&%s_ns::property_object" % p.upper())
    204          else:
    205             objlist.append("new UnsupportedPropertyObject(%s, %sProperty)" % (p, k))
    206        f.write("\n  PropertyObject* property_object_table[] = {\n    ")
    207        f.write(",\n    ".join(objlist) + '  };\n}\n')
    208        cformat.close_header_file(f)
     209                objlist.append("new UnsupportedPropertyObject(%s, %sProperty)" % (p, k))
     210        f.write("\n  PropertyObject* property_object_table[] = {\n    ")
     211        f.write(",\n    ".join(objlist) + '  };\n}\n')
     212        cformat.close_header_file(f)
    209213
    210214
    211215
    212216def UCD_main():
    213    ucd = UCD_generator()
    214 
    215    # First parse all property names and their aliases
    216    ucd.load_property_name_info()
    217    #
    218    # Generate the PropertyAliases.h file to define all the Unicode property_t enum
    219    # and the basic property information.
    220    ucd.generate_PropertyAliases_h()
    221    #
    222    # Next parse all property value names and their aliases.  Generate the data.
    223    ucd.load_property_value_info()
    224    ucd.generate_PropertyValueAliases_h()
    225    #
    226    # The Block property
    227    ucd.generate_property_value_file('Blocks', 'blk')
    228    #
    229    # Scripts
    230    ucd.generate_property_value_file('Scripts', 'sc')
    231    #
    232    # Script Extensions
    233    ucd.generate_ScriptExtensions_h()
    234    #
    235    # General Category
    236    ucd.generate_property_value_file('extracted/DerivedGeneralCategory', 'gc')
    237    #
    238    # Binary properties from PropList.txt
    239    ucd.generate_binary_properties_file('PropList')
    240    #
    241    # Binary properties from DerivedCoreProperties.txt
    242    ucd.generate_binary_properties_file('DerivedCoreProperties')
    243    #
    244    #
    245    # LineBreak types
    246    ucd.generate_property_value_file('LineBreak', 'lb')
    247    #
    248    # East Asian Width
    249    ucd.generate_property_value_file('EastAsianWidth', 'ea')
    250    #
    251    # Hangul Syllable Type
    252    ucd.generate_property_value_file('HangulSyllableType', 'hst')
    253    #
    254    # Jamo Short Name - AAARGH - property value for 110B is an empty string!!!!!  - Not in PropertyValueAliases.txt
    255    # ucd.generate_property_value_file('Jamo', 'jsn')
    256    #
    257    # Stubs
    258    #
    259    ucd.generate_PropertyObjectTable_h()
     217    ucd = UCD_generator()
     218
     219    # First parse all property names and their aliases
     220    ucd.load_property_name_info()
     221    #
     222    # Generate the PropertyAliases.h file to define all the Unicode property_t enum
     223    # and the basic property information.
     224    ucd.generate_PropertyAliases_h()
     225    #
     226    # Next parse all property value names and their aliases.  Generate the data.
     227    ucd.load_property_value_info()
     228    ucd.generate_PropertyValueAliases_h()
     229    #
     230    # The Block property
     231    ucd.generate_property_value_file('Blocks', 'blk')
     232    #
     233    # Scripts
     234    ucd.generate_property_value_file('Scripts', 'sc')
     235    #
     236    # Script Extensions
     237    ucd.generate_ScriptExtensions_h()
     238    #
     239    # General Category
     240    ucd.generate_property_value_file('extracted/DerivedGeneralCategory', 'gc')
     241    #
     242    # Binary properties from PropList.txt
     243    ucd.generate_binary_properties_file('PropList')
     244    #
     245    # Binary properties from DerivedCoreProperties.txt
     246    ucd.generate_binary_properties_file('DerivedCoreProperties')
     247    #
     248    #
     249    # LineBreak types
     250    ucd.generate_property_value_file('LineBreak', 'lb')
     251    #
     252    # East Asian Width
     253    ucd.generate_property_value_file('EastAsianWidth', 'ea')
     254    #
     255    # Hangul Syllable Type
     256    ucd.generate_property_value_file('HangulSyllableType', 'hst')
     257    #
     258    # Jamo Short Name - AAARGH - property value for 110B is an empty string!!!!!  - Not in PropertyValueAliases.txt
     259    # ucd.generate_property_value_file('Jamo', 'jsn')
     260    #
     261    # Stubs
     262    #
     263    ucd.generate_PropertyObjectTable_h()
    260264
    261265if __name__ == "__main__":
  • proto/charsetcompiler/UCD/cformat.py

    r4317 r4389  
    33#define %s
    44/*
    5  *  Copyright (c) 2014 International Characters, Inc.
     5 *  Copyright (c) 2015 International Characters, Inc.
    66 *  This software is licensed to the public under the Open Software License 3.0.
    77 *  icgrep is a trademark of International Characters, Inc.
  • proto/charsetcompiler/generate_UCD_property_functions.py

    r4387 r4389  
    44#
    55# Robert D. Cameron
    6 # December 28, 2014
     6# January 2, 2015
    77#
    88# Licensed under Open Software License 3.0.
     
    5959 
    6060    def load_property_name_info(self):
    61        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
     61        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
    6262
    6363    def load_property_value_info(self):
    64        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
     64        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
    6565
    6666    def generate_enumerated_property_template(self, filename_root, property_code):
    67        f = cformat.open_header_file_for_write(os.path.basename("ucd_precompiled_%s" % property_code))
    68        cformat.write_imports(f, ['"pablo/pablo_compiler.h"'])
    69        f.write(Unicode_pablo_template)
    70        pc = property_code.lower()
    71        vlist = self.property_value_list[property_code]
    72        for v in vlist:
    73           f.write(Unicode_property_fn_template.substitute({'property': pc, 'value': v}))
    74        install_stmt = "  p.InstallExternalFunction(\"__get_%s_%%s\", (void *) & __get_%s_%%s);\n" % (pc, pc)
    75        install_list = [install_stmt % (k, k) for k in vlist]
    76        f.write(install_fn % (pc, "".join(install_list)))
    77        cformat.close_header_file(f)
     67        f = cformat.open_header_file_for_write(os.path.basename("ucd_precompiled_%s" % property_code), 'generate_UCD_property_functions.py')
     68        cformat.write_imports(f, ['"pablo/pablo_compiler.h"'])
     69        f.write(Unicode_pablo_template)
     70        pc = property_code.lower()
     71        vlist = self.property_value_list[property_code]
     72        for v in vlist:
     73            f.write(Unicode_property_fn_template.substitute({'property': pc, 'value': v}))
     74        install_stmt = "  p.InstallExternalFunction(\"__get_%s_%%s\", (void *) & __get_%s_%%s);\n" % (pc, pc)
     75        install_list = [install_stmt % (k, k) for k in vlist]
     76        f.write(install_fn % (pc, "".join(install_list)))
     77        cformat.close_header_file(f)
    7878       
    7979    def generate_enumerated_property_file(self, filename_root, property_code):
    80        vlist = self.property_value_list[property_code]
    81        canon_map = self.property_value_lookup_map[property_code]
    82        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
    83        basename = os.path.basename(filename_root)
    84        f = open(basename + ".pablo", "wb")
    85        for k in vlist:
    86          parms = {'property' : property_code.lower(), 'value' : k}
    87          f.write(Unicode_CC_struct.substitute(parms))
    88          f.write(Unicode_CC_header.substitute(parms))
    89          template_var = Unicode_template_var.substitute(parms)
    90          f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
    91        f.write(Unicode_dummy_main)
    92        cformat.close_header_file(f)
     80        vlist = self.property_value_list[property_code]
     81        canon_map = self.property_value_lookup_map[property_code]
     82        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
     83        basename = os.path.basename(filename_root)
     84        f = open(basename + ".pablo", "wb")
     85        for k in vlist:
     86            parms = {'property' : property_code.lower(), 'value' : k}
     87            f.write(Unicode_CC_struct.substitute(parms))
     88            f.write(Unicode_CC_header.substitute(parms))
     89            template_var = Unicode_template_var.substitute(parms)
     90            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
     91        f.write(Unicode_dummy_main)
     92        cformat.close_header_file(f)
    9393
    9494    def generate_ScriptExtensions_pablo(self):
    95        filename_root = 'ScriptExtensions'
    96        property_code = 'scx'
    97        vlist = self.property_value_list['sc']
    98        (prop_values, value_map) = parse_ScriptExtensions_txt(vlist, self.property_value_lookup_map['sc'])
    99        basename = os.path.basename(filename_root)
    100        f = open(basename + ".pablo", "wb")
    101        for k in vlist:
    102          parms = {'property' : 'scx', 'value' : k}
    103          f.write(Unicode_CC_struct.substitute(parms))
    104          f.write(Unicode_CC_header.substitute(parms))
    105          template_var = Unicode_template_var.substitute(parms)
    106          f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
    107        f.write(Unicode_dummy_main)
    108        cformat.close_header_file(f)
     95        filename_root = 'ScriptExtensions'
     96        property_code = 'scx'
     97        vlist = self.property_value_list['sc']
     98        (prop_values, value_map) = parse_ScriptExtensions_txt(vlist, self.property_value_lookup_map['sc'])
     99        basename = os.path.basename(filename_root)
     100        f = open(basename + ".pablo", "wb")
     101        for k in vlist:
     102            parms = {'property' : 'scx', 'value' : k}
     103            f.write(Unicode_CC_struct.substitute(parms))
     104            f.write(Unicode_CC_header.substitute(parms))
     105            template_var = Unicode_template_var.substitute(parms)
     106            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
     107        f.write(Unicode_dummy_main)
     108        cformat.close_header_file(f)
    109109
     110    def generate_binary_property_template(self, filename_root):
     111        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
     112        lprops = [p.lower() for p in props]
     113        f = cformat.open_header_file_for_write(os.path.basename("ucd_precompiled_%s" % filename_root.lower()), 'generate_UCD_property_functions.py')
     114        cformat.write_imports(f, ['"pablo/pablo_compiler.h"'])
     115        f.write(Unicode_pablo_template)
     116        for p in lprops:
     117            f.write(Unicode_property_fn_template.substitute({'property': p, 'value': 'Y'}))
     118        install_stmt = "  p.InstallExternalFunction(\"__get_%s_Y\", (void *) & __get_%s_Y);\n"
     119        install_list = [install_stmt % (p, p) for p in lprops]
     120        f.write(install_fn % (filename_root, "".join(install_list)))
     121        cformat.close_header_file(f)
     122       
     123    def generate_binary_properties_file(self, filename_root):
     124        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
     125        basename = os.path.basename(filename_root)
     126        f = open(basename + ".pablo", "wb")
     127        for p in props:
     128            parms = {'property' : p.lower(), 'value' : 'Y'}
     129            f.write(Unicode_CC_struct.substitute(parms))
     130            f.write(Unicode_CC_header.substitute(parms))
     131            template_var = Unicode_template_var.substitute(parms)
     132            f.write(generateCharClassDefs(defaultIfRangeList, {'Y': uset_to_range_list(prop_map[p])},  template_var))
     133        f.write(Unicode_dummy_main)
     134        cformat.close_header_file(f)
    110135
    111136
    112137def UCD_main():
    113    ucd = UCD_generator()
     138    ucd = UCD_generator()
    114139
    115    # First parse all property names and their aliases
    116    ucd.load_property_name_info()
    117    #
    118    # Next parse all property value names and their aliases.  Generate the data.
    119    ucd.load_property_value_info()
    120    #
    121    # The Block property
    122    ucd.generate_enumerated_property_template('Blocks', 'blk')
    123    ucd.generate_enumerated_property_file('Blocks', 'blk')
    124    #
    125    # Scripts
    126    ucd.generate_enumerated_property_template('Scripts', 'sc')
    127    ucd.generate_enumerated_property_file('Scripts', 'sc')
    128    #
    129    # Script Extensions
    130    ucd.generate_enumerated_property_template('ScriptExtensions', 'scx')
    131    ucd.generate_ScriptExtensions_pablo()
    132    #
    133    # General Category
    134    ucd.generate_enumerated_property_template('GeneralCategory', 'gc')
    135    ucd.generate_enumerated_property_file('extracted/DerivedGeneralCategory', 'gc')
    136    #
     140    # First parse all property names and their aliases
     141    ucd.load_property_name_info()
     142    #
     143    # Next parse all property value names and their aliases.  Generate the data.
     144    ucd.load_property_value_info()
     145    #
     146    # The Block property
     147    ucd.generate_enumerated_property_template('Blocks', 'blk')
     148    ucd.generate_enumerated_property_file('Blocks', 'blk')
     149    #
     150    # Scripts
     151    ucd.generate_enumerated_property_template('Scripts', 'sc')
     152    ucd.generate_enumerated_property_file('Scripts', 'sc')
     153    #
     154    # Script Extensions
     155    ucd.generate_enumerated_property_template('ScriptExtensions', 'scx')
     156    ucd.generate_ScriptExtensions_pablo()
     157    #
     158    # General Category
     159    ucd.generate_enumerated_property_template('GeneralCategory', 'gc')
     160    ucd.generate_enumerated_property_file('extracted/DerivedGeneralCategory', 'gc')
     161    #
     162    # Core Properties
     163    ucd.generate_binary_property_template('DerivedCoreProperties')
     164    ucd.generate_binary_properties_file('DerivedCoreProperties')
     165    #ucd.generate_binary_properties_file('PropList')
    137166
    138167if __name__ == "__main__":
    139   set_UCD_dir('UCD/7.0.0')
    140   UCD_main()
     168    set_UCD_dir('UCD/7.0.0')
     169    UCD_main()
Note: See TracChangeset for help on using the changeset viewer.