Changeset 5153 for proto/charsetcompiler


Ignore:
Timestamp:
Sep 12, 2016, 12:58:31 PM (3 years ago)
Author:
cameron
Message:

Generate property value enumerations with default value first (enum code 0).

Location:
proto/charsetcompiler/UCD
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r5143 r5153  
    163163def parse_UCD_enumerated_property_map(property_code, vlist, canon_map, mapfile, default_value = None):
    164164    value_map = {}
    165     name_list_order = []
     165    for v in vlist: value_map[v] = empty_uset()
     166    if default_value == None:
     167        name_list_order = []
     168    else:
     169        # Default value must always be first in the final enumeration order.
     170        name_list_order = [default_value]
    166171    f = open(UCD_config.UCD_src_dir + "/" + mapfile)
    167172    lines = f.readlines()
     
    177182                if missing_lo != 0 or missing_hi != 0x10FFFF: raise Exception("Unexpected missing data range '%x, %x'" % (missing_lo, missing_hi))
    178183                default_value = canon_map[default_value]
     184                #print "Property %s: setting default_value  %s" % (property_code, default_value)
     185                # Default value must always be first in the final enumeration order.
     186                if default_value in name_list_order: name_list_order.remove(default_value)
     187                name_list_order = [default_value] + name_list_order
    179188            continue  # skip comment and blank lines
    180189        m = UCD_point_name_regexp.match(t)
     
    190199        if not canon_map.has_key(cname):  raise Exception("Unknown property or property value name '%s'" % cname)
    191200        name = canon_map[cname]
    192         if not value_map.has_key(name):
    193             value_map[name] = newset
     201        if not name in name_list_order:
    194202            name_list_order.append(name)
    195         else: value_map[name] = uset_union(value_map[name], newset)
     203        value_map[name] = uset_union(value_map[name], newset)
    196204    if property_code == 'gc':
    197205        # special logic for derived categories
     
    204212        value_map['Z'] = union_of_all([value_map[v] for v in ['Zs', 'Zl', 'Zp']])
    205213        value_map['C'] = union_of_all([value_map[v] for v in ['Cc', 'Cf', 'Cs', 'Co', 'Cn']])
    206         name_list_order = ['LC', 'L', 'M', 'N', 'P', 'S', 'Z', 'C']+ name_list_order
    207     for v in vlist:
    208         if not v in name_list_order:
    209             #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
    210             #print("Warning: property %s has no instance of value %s" % (property_code, v))
    211             value_map[v] = empty_uset()
    212             name_list_order.append(v)
     214        name_list_order += ['LC', 'L', 'M', 'N', 'P', 'S', 'Z', 'C']
    213215    explicitly_defined_cps = empty_uset()
    214216    for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
    215217    need_default_value = uset_complement(explicitly_defined_cps)
    216218    if default_value != None:
    217         if value_map.has_key(default_value):
    218             value_map[default_value] = uset_union(value_map[default_value], need_default_value)
    219         else:
    220             value_map[default_value] = need_default_value
    221             name_list_order.append(default_value)
     219        value_map[default_value] = uset_union(value_map[default_value], need_default_value)
    222220    elif uset_popcount(need_default_value) > 0:
    223221        print "Warning no default value, but %i codepoints not specified" % uset_popcount(need_default_value)
     222    for v in vlist:
     223        if not v in name_list_order:
     224            #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
     225            print("Warning: property %s has no instance of value %s" % (property_code, v))
     226            name_list_order.append(v)
    224227    return (name_list_order, value_map)
    225228
  • proto/charsetcompiler/UCD/UCD_properties.py

    r5143 r5153  
    100100
    101101 
    102     def generate_property_value_file(self, filename_root, property_code, default_value = None):
     102    def generate_property_value_file(self, filename_root, property_code):
     103        canon_map = self.property_value_lookup_map[property_code]
     104        if self.missing_specs.has_key(property_code):
     105            default_value = canon_map[canonicalize(self.missing_specs[property_code])]
     106        else: default_value = None
    103107        vlist = self.property_value_list[property_code]
    104108        canon_map = self.property_value_lookup_map[property_code]
    105109        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
     110        #
     111        self.property_value_list[property_code] = prop_values
    106112        basename = os.path.basename(filename_root)
    107113        f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     
    109115        f.write("\nnamespace UCD {\n")
    110116        f.write("    namespace %s_ns {\n" % property_code.upper())
    111         for v in self.property_value_list[property_code]:
     117        for v in prop_values:
    112118            f.write("    /** Code Point Ranges for %s\n    " % v)
    113119            f.write(cformat.multiline_fill(['[%s, %s]' % (lo, hi) for (lo, hi) in uset_to_range_list(value_map[v])], ',', 4))
     
    116122            f.write(value_map[v].showC(8) + ";\n")
    117123        print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    118         set_list = ['&%s_Set' % v.lower() for v in self.property_value_list[property_code]]
     124        set_list = ['&%s_Set' % v.lower() for v in prop_values]
    119125        f.write("    static EnumeratedPropertyObject property_object\n")
    120126        f.write("        {%s,\n" % property_code)
     
    214220    # Next parse all property value names and their aliases.  Generate the data.
    215221    ucd.load_property_value_info()
    216     ucd.generate_PropertyValueAliases_h()
    217222    #
    218223    # The Age property
     
    281286    # ucd.generate_property_value_file('Jamo', 'jsn')
    282287    #
    283     # Stubs
    284     #
     288    #
     289    #
     290    ucd.generate_PropertyValueAliases_h()
     291
    285292    ucd.generate_PropertyObjectTable_h()
    286293
  • proto/charsetcompiler/UCD/unicode_set.py

    r4632 r5153  
    3030UnicodeQuadCount = 0x110000 / quad_bits #  2**log2_quad_bits codepoints per quad
    3131FullQuadMask = (1<<(quad_bits)) - 1
    32 run_bytes = 2
     32run_bytes = 4
    3333
    3434
Note: See TracChangeset for help on using the changeset viewer.