Ignore:
Timestamp:
Dec 28, 2014, 12:39:12 PM (5 years ago)
Author:
cameron
Message:

Factor out parsing of ScriptExtensions?.txt into UCD_parser

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r4367 r4368  
    210210    return (name_list_order, value_map)
    211211
     212def parse_ScriptExtensions_txt(canonical_property_value_map):
     213    filename_root = 'ScriptExtensions'
     214    property_code = 'scx'
     215    (scripts, script_map) = parse_UCD_codepoint_name_map('Scripts.txt', canonical_property_value_map)
     216    (scx_sets, scx_set_map) = parse_UCD_codepoint_name_map('ScriptExtensions.txt')
     217    value_map = {}
     218    explicitly_defined_set = empty_uset()
     219    for scx_list in scx_sets:
     220        scx_items = scx_list.split(" ")
     221        for scx in scx_items:
     222            #sc = canonical_property_value_map[canonicalize(scx)]
     223            sc = scx
     224            if value_map.has_key(sc):
     225               value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
     226            else: value_map[sc] = scx_set_map[scx_list]
     227        explicitly_defined_set = uset_union(explicitly_defined_set, scx_set_map[scx_list])
     228    for v in canonical_property_value_map.keys():
     229        if value_map.has_key(v):
     230            value_map[v] = uset_union(value_map[v], uset_difference(script_map[v], explicitly_defined_set))
     231        elif script_map.has_key(v):
     232            value_map[v] = script_map[v]
     233        else: value_map[v] = empty_uset()
     234    return (sorted(canonical_property_value_map.keys()), value_map)
     235
     236
    212237def parse_UCD_codepoint_name_map(mapfile, canonical_name_lookup_map = None):
    213238   value_map = {}
Note: See TracChangeset for help on using the changeset viewer.