Changeset 5155


Ignore:
Timestamp:
Sep 14, 2016, 7:57:00 AM (2 years ago)
Author:
cameron
Message:

Enumeration parsing now returns only values explicitly found, add count of these independent enums

Location:
proto/charsetcompiler/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r5153 r5155  
    161161UCD_range_name_regexp = re.compile("^([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s*;\s*((?:[-A-Za-z0-9_.]+\s+)*[-A-Za-z0-9_.]+)\s*(?:[;#]|$)")
    162162
     163#
     164# Parse a file defining the enumerated property values for a given enumerated property,
     165# returning the list of independent property values found, as well as the value map.
     166# Ensure that the default value for the property is first in the list of property values,
     167# and that all codepoints not explicitly identified in the file are mapped to this default.
    163168def parse_UCD_enumerated_property_map(property_code, vlist, canon_map, mapfile, default_value = None):
    164169    value_map = {}
     
    202207            name_list_order.append(name)
    203208        value_map[name] = uset_union(value_map[name], newset)
    204     if property_code == 'gc':
    205         # special logic for derived categories
    206         value_map['LC'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt']])
    207         value_map['L'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo']])
    208         value_map['M'] = union_of_all([value_map[v] for v in ['Mn', 'Mc', 'Me']])
    209         value_map['N'] = union_of_all([value_map[v] for v in ['Nd', 'Nl', 'No']])
    210         value_map['P'] = union_of_all([value_map[v] for v in ['Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po']])
    211         value_map['S'] = union_of_all([value_map[v] for v in ['Sm', 'Sc', 'Sk', 'So']])
    212         value_map['Z'] = union_of_all([value_map[v] for v in ['Zs', 'Zl', 'Zp']])
    213         value_map['C'] = union_of_all([value_map[v] for v in ['Cc', 'Cf', 'Cs', 'Co', 'Cn']])
    214         name_list_order += ['LC', 'L', 'M', 'N', 'P', 'S', 'Z', 'C']
    215209    explicitly_defined_cps = empty_uset()
    216210    for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
     
    220214    elif uset_popcount(need_default_value) > 0:
    221215        print "Warning no default value, but %i codepoints not specified" % uset_popcount(need_default_value)
    222     for v in vlist:
    223         if not v in name_list_order:
    224             #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
    225             print("Warning: property %s has no instance of value %s" % (property_code, v))
    226             name_list_order.append(v)
    227216    return (name_list_order, value_map)
    228217
  • proto/charsetcompiler/UCD/UCD_properties.py

    r5153 r5155  
    108108        canon_map = self.property_value_lookup_map[property_code]
    109109        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
     110        independent_prop_values = len(prop_values)
     111        for v in vlist:
     112            if not v in prop_values:
     113                #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
     114                print("Warning: property %s has no instance of value %s" % (property_code, v))
     115                prop_values.append(v)
    110116        #
    111117        self.property_value_list[property_code] = prop_values
     
    114120        cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    115121        f.write("\nnamespace UCD {\n")
    116         f.write("    namespace %s_ns {\n" % property_code.upper())
     122        f.write("  namespace %s_ns {\n" % property_code.upper())
     123        f.write("    const unsigned independent_prop_values = %s;\n" % independent_prop_values)
     124        if property_code == 'gc':
     125            # special logic for derived categories
     126            value_map['LC'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt']])
     127            value_map['L'] = union_of_all([value_map[v] for v in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo']])
     128            value_map['M'] = union_of_all([value_map[v] for v in ['Mn', 'Mc', 'Me']])
     129            value_map['N'] = union_of_all([value_map[v] for v in ['Nd', 'Nl', 'No']])
     130            value_map['P'] = union_of_all([value_map[v] for v in ['Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po']])
     131            value_map['S'] = union_of_all([value_map[v] for v in ['Sm', 'Sc', 'Sk', 'So']])
     132            value_map['Z'] = union_of_all([value_map[v] for v in ['Zs', 'Zl', 'Zp']])
     133            value_map['C'] = union_of_all([value_map[v] for v in ['Cc', 'Cf', 'Cs', 'Co', 'Cn']])
    117134        for v in prop_values:
    118135            f.write("    /** Code Point Ranges for %s\n    " % v)
     
    125142        f.write("    static EnumeratedPropertyObject property_object\n")
    126143        f.write("        {%s,\n" % property_code)
     144        f.write("         %s_ns::independent_prop_values,\n" % property_code.upper())
    127145        f.write("         %s_ns::enum_names,\n" % property_code.upper())
    128146        f.write("         %s_ns::value_names,\n" % property_code.upper())
Note: See TracChangeset for help on using the changeset viewer.