Ignore:
Timestamp:
Oct 6, 2017, 11:36:55 AM (22 months ago)
Author:
cameron
Message:

StringOverride? properties (simple case conversion vs full case conversion)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5671 r5672  
    6767            property_object_map[property_code] = EnumeratedPropertyObject()
    6868        elif property_kind == "String":
    69             property_object_map[property_code] = StringPropertyObject()
     69            if property_code in ["uc", "lc", "tc", "cf"]:
     70                property_object_map[property_code] = StringOverridePropertyObject("s" + property_code)
     71            else:
     72                property_object_map[property_code] = StringPropertyObject()
    7073        elif property_kind == "Numeric":
    7174            property_object_map[property_code] = NumericPropertyObject()
     
    398401
    399402def parse_SpecialCasing_txt(property_object_map):
    400     data_records = []
    401403    f = open(UCD_config.UCD_src_dir + "/SpecialCasing.txt")
    402404    lines = f.readlines()
     
    418420    property_object_map['tc'].finalizeProperty()
    419421
     422
     423# CaseFolding.txt has four types of fold entries:
     424# S, C, F, T:  Simple, Common, Full and Turkic. 
     425# The SimpleCaseFold property is the set of mappings S+C,
     426# The FullCaseFold property is the set F+C
     427# There may be multiple entries per codepoint
     428
     429def parse_CaseFolding_txt():
     430    fold_map = {}
     431    f = open(UCD_config.UCD_src_dir + "/" + 'CaseFolding.txt')
     432    lines = f.readlines()
     433    for t in lines:
     434        if UCD_skip.match(t): continue  # skip comment and blank lines
     435        (cp, cp_hi, fields) = parse_data_record(t)
     436        (fold_type, fold_val) = (fields[0], fields[1])
     437        if not fold_type in fold_map: fold_map[fold_type] = {}
     438        if fold_type == 'S' or fold_type == 'C':
     439            # fold value is guaranteed to be a single codepoint
     440            fold_val = int(fold_val, 16)
     441        else:
     442            fold_val = [int(x, 16) for x in fold_val.split(" ")]
     443        fold_map[fold_type][cp] = fold_val
     444    return fold_map
     445
Note: See TracChangeset for help on using the changeset viewer.