Changeset 4368 for proto


Ignore:
Timestamp:
Dec 28, 2014, 12:39:12 PM (4 years ago)
Author:
cameron
Message:

Factor out parsing of ScriptExtensions?.txt into UCD_parser

Location:
proto/charsetcompiler/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r4367 r4368  
    210210    return (name_list_order, value_map)
    211211
     212def parse_ScriptExtensions_txt(canonical_property_value_map):
     213    filename_root = 'ScriptExtensions'
     214    property_code = 'scx'
     215    (scripts, script_map) = parse_UCD_codepoint_name_map('Scripts.txt', canonical_property_value_map)
     216    (scx_sets, scx_set_map) = parse_UCD_codepoint_name_map('ScriptExtensions.txt')
     217    value_map = {}
     218    explicitly_defined_set = empty_uset()
     219    for scx_list in scx_sets:
     220        scx_items = scx_list.split(" ")
     221        for scx in scx_items:
     222            #sc = canonical_property_value_map[canonicalize(scx)]
     223            sc = scx
     224            if value_map.has_key(sc):
     225               value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
     226            else: value_map[sc] = scx_set_map[scx_list]
     227        explicitly_defined_set = uset_union(explicitly_defined_set, scx_set_map[scx_list])
     228    for v in canonical_property_value_map.keys():
     229        if value_map.has_key(v):
     230            value_map[v] = uset_union(value_map[v], uset_difference(script_map[v], explicitly_defined_set))
     231        elif script_map.has_key(v):
     232            value_map[v] = script_map[v]
     233        else: value_map[v] = empty_uset()
     234    return (sorted(canonical_property_value_map.keys()), value_map)
     235
     236
    212237def parse_UCD_codepoint_name_map(mapfile, canonical_name_lookup_map = None):
    213238   value_map = {}
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4367 r4368  
    142142       filename_root = 'ScriptExtensions'
    143143       property_code = 'scx'
    144        canonical_property_value_map = self.property_value_lookup_map['sc']
    145        (scripts, script_map) = parse_UCD_codepoint_name_map('Scripts.txt', canonical_property_value_map)
    146        (scx_sets, scx_set_map) = parse_UCD_codepoint_name_map('ScriptExtensions.txt')
    147        value_map = {}
     144       (prop_values, value_map) = parse_ScriptExtensions_txt(self.property_value_lookup_map['sc'])
    148145       basename = os.path.basename(filename_root)
    149146       f = cformat.open_header_file_for_write(basename)
    150147       cformat.write_imports(f, ["<vector>", '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    151148       f.write("\nusing namespace UCD;\n\n")
    152        explicitly_defined_set = empty_uset()
    153        for scx_list in scx_sets:
    154          scx_items = scx_list.split(" ")
    155          for scx in scx_items:
    156             #sc = canonical_property_value_map[canonicalize(scx)]
    157             sc = scx
    158             if value_map.has_key(sc):
    159                value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
    160             else: value_map[sc] = scx_set_map[scx_list]
    161          explicitly_defined_set = uset_union(explicitly_defined_set, scx_set_map[scx_list])
    162149       for v in self.property_value_list['sc']:
    163           if value_map.has_key(v):
    164             value_map[v] = uset_union(value_map[v], uset_difference(script_map[v], explicitly_defined_set))
    165           elif script_map.has_key(v):
    166             value_map[v] = script_map[v]
    167           else: value_map[v] = empty_uset()
    168        for v in self.property_value_list['sc']:
    169          #v = canonical_property_value_map[canonicalize(v)]
    170150         f.write("    const UnicodeSet %s_Ext \n" % v.lower())
    171151         f.write(value_map[v].showC(6) + ";\n")
Note: See TracChangeset for help on using the changeset viewer.