Changeset 4192 for proto/charsetcompiler


Ignore:
Timestamp:
Sep 24, 2014, 7:20:14 PM (5 years ago)
Author:
cameron
Message:

Build-in support for gc categories LC/L/M/N/O/S/Z/C

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4191 r4192  
    4848"""
    4949
    50 
    51 
    52 
    5350PropertyValueAliases_template = r"""
    5451namespace UCD {
     
    7673"""
    7774
    78 PropertyValues_template = r"""
    79 using std::vector;
    80 
    81 namespace UCD {
    82   vector<UnicodeSet> value_sets[] = {
    83 %s
    84   };
    85 }
    86 """
    87 
    88 
    89 
    90 
    91 
    92 def generate_PropertyValueSets_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map):
    93    f = cformat.open_header_file_for_write('PropertyValueSets')
    94    cformat.write_imports(f, ["<vector>", '"unicode_set.h"'])
    95    vec_decl_list = []
    96    for p in property_enum_name_list:
    97      if not property_value_list.has_key(p):
    98        vec_decl_list.append("vector<UnicodeSet>(0)")
    99      elif property_value_list[p] == ['N', 'Y']:
    100        vec_decl_list.append("vector<UnicodeSet>(1)")
    101      elif p == 'scx':
    102        vec_decl_list.append("vector<UnicodeSet>(%i)" % len(property_value_list['sc']))
    103      else:
    104        vec_decl_list.append("vector<UnicodeSet>(%i)" % len(property_value_list[p]))
    105    f.write(PropertyValues_template % (cformat.multiline_fill(vec_decl_list, ',', 6)))
    106    cformat.close_header_file(f)
    107 
     75#
     76#  Union of a list of sets
     77#
     78def union_of_all(uset_list):
     79   if uset_list == []: return empty_uset()
     80   else:
     81     accum_set = uset_list[0]
     82     for s in uset_list[1:]:
     83        accum_set = uset_union(accum_set, s)
     84     return accum_set
    10885
    10986#
     
    141118
    142119   
    143 def generate_binary_property_stubs(props):
    144    f = cformat.open_header_file_for_write('PropertyValueStubs')
    145    cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"'])
    146    f.write("\nnamespace UCD {\n")
    147    for p in sorted(props):
    148      f.write("  namespace %s {\n    const UnsupportedPropertyObject property_object{%s, BinaryProperty};\n  }\n" % (p.upper(), p))
    149    f.write("}\n\n")
    150    cformat.close_header_file(f)
    151      
    152120CodepointProperties = ['scf', 'slc', 'suc', 'stc']
    153121
     
    330298           cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"'])
    331299           f.write("\nnamespace UCD {\n")
    332            print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    333300           f.write("  namespace %s {\n" % property_code.upper())
    334            all_explicit_values = empty_uset()
    335            for v in self.property_value_list[property_code]:
    336              all_explicit_values = uset_union(all_explicit_values, value_map[v])
     301           all_explicit_values = union_of_all([value_map[v] for v in self.property_value_list[property_code]])
    337302           missing_values = uset_complement(all_explicit_values)
    338303           if self.missing_specs.has_key(property_code):
     
    341306             default_key = self.property_value_lookup_map[property_code][default]
    342307             value_map[default_key] = uset_union(value_map[default_key], missing_values) 
     308#
     309#
     310           if property_code == 'gc':
     311             value_map['LC'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt']])
     312             value_map['L'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo']])
     313             value_map['M'] = union_of_all([value_map[v] for v in ['Mn', 'Mc', 'Me']])
     314             value_map['N'] = union_of_all([value_map[v] for v in ['Nd', 'Nl', 'No']])
     315             value_map['P'] = union_of_all([value_map[v] for v in ['Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po']])
     316             value_map['S'] = union_of_all([value_map[v] for v in ['Sm', 'Sc', 'Sk', 'So']])
     317             value_map['Z'] = union_of_all([value_map[v] for v in ['Zs', 'Zl', 'Zp']])
     318             value_map['C'] = union_of_all([value_map[v] for v in ['Cc', 'Cf', 'Cs', 'Co', 'Cn']])
    343319           for v in self.property_value_list[property_code]:
    344320             f.write("    const UnicodeSet %s_Set \n" % v.lower())
    345321             f.write(value_map[v].showC(6) + ";\n")
     322           print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    346323           if not self.missing_specs.has_key(property_code):
    347324             f.write("    const UnicodeSet Missing_Set \n")
Note: See TracChangeset for help on using the changeset viewer.