Ignore:
Timestamp:
Oct 5, 2017, 10:45:20 AM (21 months ago)
Author:
cameron
Message:

PropertyObject? restructuring - remove Miscellaneous and Codepoint objects, add Obsolete

Location:
icGREP/icgrep-devel/UCD-scripts
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5662 r5668  
    4040UCD_property_section_regexp = re.compile("^#\s*([-A-Za-z_0-9]+)\s*Properties\s*$")
    4141UCD_property_alias_regexp = re.compile("^([-A-Za-z_0-9]+)\s*;\s*([-A-Za-z_0-9]+)([^#]*)")
     42
     43# Section 2.3.3 of UAX $44
     44Obsolete_Properties = ["na1", "Gr_Link", "Hyphen", "isc", "XO_NFC", "XO_NFD", "XO_NFKC", "XO_NFKD" ,"FC_NFKC"]
    4245
    4346def parse_PropertyAlias_txt():
     
    5558        (property_code, prop_preferred_full_name, prop_extra) = (m.group(1), m.group(2), m.group(3))
    5659        property_enum_name_list.append(property_code)
    57         if property_kind == "Binary":
     60        if property_code in Obsolete_Properties:
     61            property_object_map[property_code] = ObsoletePropertyObject()
     62        elif property_kind == "Binary":
    5863            property_object_map[property_code] = BinaryPropertyObject()
    5964        elif property_kind == "Enumerated":
     
    263268            (prop_code, v) = parse_property_and_value(fields, property_lookup_map)
    264269            if not prop_code in props: props.append(prop_code)
    265             if v == None:  # binary property
    266                 property_object_map[prop_code].addDataRecord(cp_lo, cp_hi)
    267             else:
    268                 property_object_map[prop_code].addDataRecord(cp_lo, cp_hi, v)
     270            property_object_map[prop_code].addDataRecord(cp_lo, cp_hi, v)
    269271    for p in props:
    270272        property_object_map[p].finalizeProperty()
     
    291293            (cp_lo, cp_hi, fields) = parse_data_record(t)
    292294            if isinstance(property_object, BinaryPropertyObject) and len(fields) == 0:
    293                 property_object.addDataRecord(cp_lo, cp_hi)
     295                property_object.addDataRecord(cp_lo, cp_hi, None)
    294296            else:
    295297                property_object.addDataRecord(cp_lo, cp_hi, fields[0])
     
    353355        (ccc, bidic, decomp, bidim) = (m.group(4), m.group(5), m.group(6), m.group(10))
    354356        (decval, digitval, numval) = (m.group(7), m.group(8), m.group(9))
    355         # Unicode 1 name and ISO comment are obolete
    356         (uc, lc, tc) = (m.group(13), m.group(14), m.group(15))
     357        (na1, isc) = (m.group(10), m.group(11))
     358        (suc, slc, stc) = (m.group(13), m.group(14), m.group(15))
    357359        rangeMatch = NameRange_regexp.match(name)
    358360        if rangeMatch:
     
    362364            if rangeMatch.group(2) == 'Last':
    363365                if not rangeName in name_range_starts: raise Exception("UnicodeData range end encountered without prior range start: %s" % t)
    364                 range_records.append((name_range_starts[rangeName], cp, rangeName, gc, ccc, bidic, decomp, decval, digitval, numval, bidim, uc, lc, tc))
    365             continue
     366                range_records.append((name_range_starts[rangeName], cp, rangeName, gc))
    366367        if not NonName_regexp.match(name):
    367368            property_object_map['na'].addDataRecord(cp, cp, name)
     
    369370            (decomp_type, mapping) = parse_decomposition(decomp)
    370371            property_object_map['dm'].addDataRecord(cp, cp, mapping)
    371         if not uc == '':
    372             property_object_map['suc'].addDataRecord(cp, cp, uc)
    373             if tc == '':
     372        if not na1 == '':
     373            property_object_map['na1'].addDataRecord(cp, cp, na1)
     374        if not suc == '':
     375            property_object_map['suc'].addDataRecord(cp, cp, suc)
     376            if stc == '':
    374377                property_object_map['stc'].addDataRecord(cp, cp, uc)
    375         if not lc == '':
    376             property_object_map['slc'].addDataRecord(cp, cp, lc)
    377         if not tc == '':
    378             property_object_map['stc'].addDataRecord(cp, cp, tc)
     378        if not slc == '':
     379            property_object_map['slc'].addDataRecord(cp, cp, slc)
     380        if not stc == '':
     381            property_object_map['stc'].addDataRecord(cp, cp, stc)
    379382    property_object_map['na'].finalizeProperty()
     383    property_object_map['na1'].finalizeProperty()
     384    property_object_map['isc'].finalizeProperty()
    380385    property_object_map['dm'].finalizeProperty()
    381386    property_object_map['slc'].finalizeProperty()
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5666 r5668  
    4040    }
    4141"""
    42 
    43 CodepointProperties = ['scf', 'slc', 'suc', 'stc']
    44 
    4542
    4643def emit_string_property(f, property_code, null_set, reflexive_set, cp_value_map):
     
    117114    f.write(cformat.multiline_fill(set_list, ',', 8))
    118115    f.write("\n         }};\n    }\n")
     116
     117def emit_Obsolete_property(f, property_code):
     118    s = string.Template(r"""    namespace ${prop_enum_up}_ns {
     119        static ObsoletePropertyObject property_object(${prop_enum});
     120    }
     121""")
     122    f.write(s.substitute(prop_enum = property_code, prop_enum_up = property_code.upper()))
     123
    119124
    120125class UCD_generator():
     
    198203        elif isinstance(property_object, StringPropertyObject):
    199204            emit_string_property(f, property_code, property_object.null_str_set, property_object.reflexive_set, property_object.cp_value_map)
     205        elif isinstance(property_object, ObsoletePropertyObject):
     206            emit_Obsolete_property(f, property_code)
    200207        else: return
    201208        self.supported_props.append(property_code)
     
    247254        f = cformat.open_header_file_for_write(basename)
    248255        cformat.write_imports(f, ['"PropertyAliases.h"', '"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    249         prop_code_list = ['na', 'dm', 'suc', 'slc', 'stc']
     256        prop_code_list = ['na', 'dm', 'suc', 'slc', 'stc', 'na1', 'isc']
    250257        f.write("\nnamespace UCD {\n")
    251258        for p in prop_code_list:
     
    298305            if p in self.supported_props:
    299306                objlist.append("&%s_ns::property_object" % p.upper())
    300             elif k == 'String':
    301                 if p in CodepointProperties:
    302                     objlist.append("new UnsupportedPropertyObject(%s, PropertyObject::ClassTypeId::CodepointProperty)" % p)
    303                 else:
    304                     objlist.append("new UnsupportedPropertyObject(%s, PropertyObject::ClassTypeId::StringProperty)" % p)
    305307            else:
    306308                objlist.append("new UnsupportedPropertyObject(%s, PropertyObject::ClassTypeId::%sProperty)" % (p, k))
  • icGREP/icgrep-devel/UCD-scripts/UCD_property_objects.py

    r5662 r5668  
    3232            raise Exception("Conflicting default specification")
    3333        self.default_value = default
     34    def addDataRecord(self, cp_lo, cp_hi, v):
     35        pass
    3436    def finalizeProperty(self):
    3537        pass
     
    134136        PropertyObject.setID(self, prop_code, long_name)
    135137
    136     def addDataRecord(self, cp_lo, cp_hi):
    137         self.value_map['Y'] = uset_union(self.value_map['Y'], range_uset(cp_lo, cp_hi))
     138    def addDataRecord(self, cp_lo, cp_hi, v):
     139        if v==None or v in self.property_value_lookup_map[v] == 'Y':
     140            self.value_map['Y'] = uset_union(self.value_map['Y'], range_uset(cp_lo, cp_hi))
     141        else:
     142            self.value_map['Y'] = uset_difference(self.value_map['Y'], range_uset(cp_lo, cp_hi))
    138143
    139144
     
    184189       
    185190    def getPropertyKind(self):
    186         if self.property_code in ['scf', 'slc', 'suc', 'stc']:
    187             return "Codepoint"
    188         else:
    189             return "String"
     191        return "String"
    190192
    191193    def addDataRecord(self, cp_lo, cp_hi, stringValue):
     
    215217            self.null_str_set = uset_union(self.null_str_set, uset_complement(uset_union(explicitly_defined_cps, self.reflexive_set)))
    216218
     219class ObsoletePropertyObject(PropertyObject):
     220    def __init__(self):
     221        PropertyObject.__init__(self)
     222
     223    def getPropertyKind(self): return "Obsolete"
     224
     225
    217226def getPropertyLookupMap(property_object_map):
    218227    property_lookup_map = {}
Note: See TracChangeset for help on using the changeset viewer.