Ignore:
Timestamp:
Nov 28, 2017, 1:48:14 AM (20 months ago)
Author:
nmedfort
Message:

updated UCD python scripts

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5686 r5749  
    1010#
    1111#
    12 import re, string, os.path, cformat, UCD_config
    13 from unicode_set import *
     12import string, os.path
    1413from UCD_parser import *
    1514from UCD_property_objects import *
     
    4544        /** Code Point Ranges for ${prop_enum} mapping to <none>
    4645        ${null_set_ranges}**/
    47 
    48         const UnicodeSet null_codepoint_set
    49         ${null_set_value};
     46       
     47        ${null_set_value}
    5048
    5149        /** Code Point Ranges for ${prop_enum} mapping to <codepoint>
    5250        ${reflexive_set_ranges}**/
    53         const UnicodeSet reflexive_set
    54         ${reflexive_set_value};
     51       
     52        ${reflexive_set_value}
    5553
    5654        const unsigned buffer_length = ${buffer_length};
    57         const static char __attribute__ ((aligned (32))) string_buffer[${allocation_length}] = u8R"__(${string_buffer})__";
    58 
    59         const static std::vector<codepoint_t> defined_cps = {
     55        const static char string_buffer[${allocation_length}] LLVM_ALIGNAS(32) = u8R"__(${string_buffer})__";
     56
     57        const static std::vector<codepoint_t> defined_cps{
    6058        ${explicitly_defined_cps}};
    6159        static StringPropertyObject property_object(${prop_enum},
    62                                                     null_codepoint_set,
    63                                                     reflexive_set,
     60                                                    std::move(null_codepoint_set),
     61                                                    std::move(reflexive_set),
    6462                                                    static_cast<const char *>(string_buffer),
    6563                                                    buffer_length,
    66                                                     defined_cps);
     64                                                    std::move(defined_cps));
    6765    }
    6866""")
     
    7371    buffer_length = len(string_buffer.encode("utf-8"))
    7472    f.write(s.substitute(prop_enum = property_code,
    75     prop_enum_up = property_code.upper(),
    76     string_buffer = string_buffer,
    77     buffer_length = buffer_length,
    78     allocation_length = (buffer_length + 255) & -256,
    79     null_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(null_set)], ',', 8),
    80     null_set_value = null_set.showC(12),
    81     reflexive_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(reflexive_set)], ',', 8),
    82     reflexive_set_value = reflexive_set.showC(12),
    83     explicitly_defined_cp_count = len(cps),
    84     explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
    85     ))
     73                         prop_enum_up = property_code.upper(),
     74                         string_buffer = string_buffer,
     75                         buffer_length = buffer_length,
     76                         allocation_length = (buffer_length + 255) & -256,
     77                         null_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(null_set)], ',', 8),
     78                         null_set_value = null_set.generate("null_codepoint_set", 8),
     79                         reflexive_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(reflexive_set)], ',', 8),
     80                         reflexive_set_value = reflexive_set.generate("reflexive_set", 8),
     81                         explicitly_defined_cp_count = len(cps),
     82                         explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
     83                         ))
    8684
    8785def emit_string_override_property(f, property_code, overridden_code, override_set, cp_value_map):
     
    9088        ${overridden_set_ranges}**/
    9189
    92         const UnicodeSet explicitly_defined_set
    93         ${overridden_set_value};
     90        ${overridden_set_value}
    9491
    9592        const unsigned buffer_length = ${buffer_length};
    96         const static char __attribute__ ((aligned (32))) string_buffer[${allocation_length}] = u8R"__(${string_buffer})__";
    97 
    98         const static std::vector<codepoint_t> defined_cps = {
     93        const static char string_buffer[${allocation_length}] LLVM_ALIGNAS(32) = u8R"__(${string_buffer})__";
     94
     95        const static std::vector<codepoint_t> defined_cps{
    9996        ${explicitly_defined_cps}};
    10097        static StringOverridePropertyObject property_object(${prop_enum},
    10198                                                    ${overridden}_ns::property_object,
    102                                                     explicitly_defined_set,
     99                                                    std::move(explicitly_defined_set),
    103100                                                    static_cast<const char *>(string_buffer),
    104101                                                    buffer_length,
    105                                                     defined_cps);
     102                                                    std::move(defined_cps));
    106103    }
    107104""")
     
    112109    buffer_length = len(string_buffer.encode("utf-8"))
    113110    f.write(s.substitute(prop_enum = property_code,
    114     prop_enum_up = property_code.upper(),
    115     overridden = overridden_code.upper(),
    116     string_buffer = string_buffer,
    117     buffer_length = buffer_length,
    118     allocation_length = (buffer_length + 255) & -256,
    119     overridden_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(override_set)], ',', 8),
    120     overridden_set_value = override_set.showC(12),
    121     explicitly_defined_cp_count = len(cps),
    122     explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
    123     ))
     111                         prop_enum_up = property_code.upper(),
     112                         overridden = overridden_code.upper(),
     113                         string_buffer = string_buffer,
     114                         buffer_length = buffer_length,
     115                         allocation_length = (buffer_length + 255) & -256,
     116                         overridden_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(override_set)], ',', 8),
     117                         overridden_set_value = override_set.generate("explicitly_defined_set", 8),
     118                         explicitly_defined_cp_count = len(cps),
     119                         explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
     120                         ))
    124121
    125122def emit_numeric_property(f, property_code, NaN_set, cp_value_map):
     
    128125        ${NaN_set_ranges}**/
    129126
    130         const UnicodeSet NaN_set
    131         ${NaN_set_value};
    132 
    133        const unsigned buffer_length = ${buffer_length};
    134         const static char __attribute__ ((aligned (32))) string_buffer[${allocation_length}] = u8R"__(${string_buffer})__";
     127        ${NaN_set_value}
     128
     129        const unsigned buffer_length = ${buffer_length};
     130        const static char string_buffer[${allocation_length}] LLVM_ALIGNAS(32) = u8R"__(${string_buffer})__";
    135131
    136132        const static std::vector<codepoint_t> defined_cps = {
    137133        ${explicitly_defined_cps}};
    138134        static NumericPropertyObject property_object(${prop_enum},
    139                                                     NaN_set,
     135                                                    std::move(NaN_set),
    140136                                                    static_cast<const char *>(string_buffer),
    141137                                                    buffer_length,
    142                                                     defined_cps);
     138                                                    std::move(defined_cps));
    143139    }
    144140""")
     
    150146    buffer_length = len(string_buffer.encode("utf-8"))
    151147    f.write(s.substitute(prop_enum = property_code,
    152     prop_enum_up = property_code.upper(),
    153     string_buffer = string_buffer,
    154     buffer_length = buffer_length,
    155     allocation_length = (buffer_length + 255) & -256,
    156     NaN_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(NaN_set)], ',', 8),
    157     NaN_set_value = NaN_set.showC(12),
    158     explicitly_defined_cp_count = len(cps),
    159     explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
    160     ))
     148                         prop_enum_up = property_code.upper(),
     149                         string_buffer = string_buffer,
     150                         buffer_length = buffer_length,
     151                         allocation_length = (buffer_length + 255) & -256,
     152                         NaN_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(NaN_set)], ',', 8),
     153                         NaN_set_value = NaN_set.generate("NaN_set", 8),
     154                         explicitly_defined_cp_count = len(cps),
     155                         explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
     156                         ))
    161157
    162158
     
    165161    f.write("        /** Code Point Ranges for %s\n        " % property_code)
    166162    f.write(cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(property_set)], ',', 8))
    167     f.write("**/\n")
    168     f.write("        const UnicodeSet codepoint_set \n")
    169     f.write(property_set.showC(12) + ";\n")
    170     f.write("        static BinaryPropertyObject property_object{%s, codepoint_set};\n    }\n" % property_code)
     163    f.write("**/\n\n")
     164    f.write(property_set.generate("codepoint_set", 8))
     165    f.write("        static BinaryPropertyObject property_object{%s, std::move(codepoint_set)};\n    }\n" % property_code)
    171166
    172167def emit_enumerated_property(f, property_code, independent_prop_values, prop_values, value_map):
     
    176171        f.write("    /** Code Point Ranges for %s\n    " % v)
    177172        f.write(cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(value_map[v])], ',', 4))
    178         f.write("**/\n")
    179         f.write("    const UnicodeSet %s_Set \n" % v.lower())
    180         f.write(value_map[v].showC(8) + ";\n")
     173        f.write("**/\n\n")
     174        f.write(value_map[v].generate(v.lower() + "_Set", 4))
    181175    set_list = ['&%s_Set' % v.lower() for v in prop_values]
    182176    f.write("    static EnumeratedPropertyObject property_object\n")
    183177    f.write("        {%s,\n" % property_code)
    184     f.write("         %s_ns::independent_prop_values,\n" % property_code.upper())
    185     f.write("         %s_ns::enum_names,\n" % property_code.upper())
    186     f.write("         %s_ns::value_names,\n" % property_code.upper())
    187     f.write("         %s_ns::aliases_only_map,\n" % property_code.upper())
    188     f.write("         {")
    189     f.write(cformat.multiline_fill(set_list, ',', 8))
    190     f.write("\n         }};\n    }\n")
     178    f.write("        %s_ns::independent_prop_values,\n" % property_code.upper())
     179    f.write("        std::move(%s_ns::enum_names),\n" % property_code.upper())
     180    f.write("        std::move(%s_ns::value_names),\n" % property_code.upper())
     181    f.write("        std::move(%s_ns::aliases_only_map),{\n" % property_code.upper())
     182    f.write("        " + cformat.multiline_fill(set_list, ',', 8))
     183    f.write("\n        }};"
     184            "\n    }\n")
    191185
    192186def emit_Obsolete_property(f, property_code):
     
    294288
    295289foldDeclarations = r"""
    296 typedef unsigned codepoint_t;
    297 
    298290struct FoldEntry {
    299     re::codepoint_t range_lo;
    300     int fold_offset;
    301     std::vector<re::interval_t> fold_pairs;
     291    const UCD::codepoint_t range_lo;
     292    const int fold_offset;
     293    const std::vector<UCD::interval_t> fold_pairs;
    302294};
    303295
    304 
    305 void caseInsensitiveInsertRange(re::CC * cc, const re::codepoint_t lo, const re::codepoint_t hi);
    306 
    307 inline void caseInsensitiveInsert(re::CC * cc, const re::codepoint_t cp) {
     296void caseInsensitiveInsertRange(UCD::UnicodeSet * const cc, const UCD::codepoint_t lo, const UCD::codepoint_t hi);
     297
     298inline void caseInsensitiveInsert(UCD::UnicodeSet * const cc, const UCD::codepoint_t cp) {
    308299    caseInsensitiveInsertRange(cc, cp, cp);
    309300}
     
    485476            f.write(cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(value_map[v])], ',', 8))
    486477            f.write("**/\n")
    487             f.write("        const UnicodeSet %s_Ext \n" % v.lower())
    488             f.write(value_map[v].showC(12) + ";\n")
     478            f.write(value_map[v].generate(v.lower() + "_Ext", 8))
    489479        set_list = ['&%s_Ext' % v.lower() for v in prop_list]
    490480        f.write("        static ExtensionPropertyObject property_object\n")
     
    518508        setVersionfromReadMe_txt()
    519509        f = cformat.open_header_file_for_write('UCD_Config')
    520         f.write("\nnamespace UCD {\n")
    521         f.write("   const std::string UnicodeVersion = \"%s\";\n" % UCD_config.version)
     510        f.write("#include <utility>\n")
     511        f.write("namespace UCD {\n")
     512        f.write("\tconst auto UnicodeVersion = \"%s\";\n" % UCD_config.version)
     513        f.write("\tusing codepoint_t = unsigned;\n")
     514        f.write("\tenum : codepoint_t { UNICODE_MAX = %s };\n" % UCD_config.UCD_max_code_point)
     515        f.write("\tusing interval_t = std::pair<codepoint_t, codepoint_t>;\n")
    522516        f.write("}\n")
    523517        cformat.close_header_file(f)
     
    529523        cm = simple_CaseClosure_map(fold_data)
    530524        f = cformat.open_header_file_for_write(basename, 'casefold.py')
    531         cformat.write_imports(f, ['"PropertyAliases.h"', '"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"', "<vector>", '"re/re_cc.h"'])
     525        cformat.write_imports(f, ['"PropertyAliases.h"', '"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"', '<vector>'])
    532526        f.write(foldDeclarations)
    533527        f.write(genFoldEntryData(cm))
Note: See TracChangeset for help on using the changeset viewer.