Ignore:
Timestamp:
Oct 5, 2017, 1:21:22 PM (2 years ago)
Author:
cameron
Message:

Numeric Property Support and missing SpecialCasing?.h file

Location:
icGREP/icgrep-devel/UCD-scripts
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5669 r5670  
    380380        if not stc == '':
    381381            property_object_map['stc'].addDataRecord(cp, cp, stc)
     382        if not decval == '':
     383            property_object_map['nv'].addDataRecord(cp, cp, decval)
     384        if not digitval == '':
     385            property_object_map['nv'].addDataRecord(cp, cp, digitval)
     386        if not numval == '':
     387            property_object_map['nv'].addDataRecord(cp, cp, numval)
     388
    382389    property_object_map['na'].finalizeProperty()
    383390    property_object_map['na1'].finalizeProperty()
     
    387394    property_object_map['suc'].finalizeProperty()
    388395    property_object_map['stc'].finalizeProperty()
     396    property_object_map['nv'].finalizeProperty()
    389397
    390398def parse_SpecialCasing_txt(property_object_map):
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5669 r5670  
    8181    reflexive_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(reflexive_set)], ',', 8),
    8282    reflexive_set_value = reflexive_set.showC(12),
     83    explicitly_defined_cp_count = len(cps),
     84    explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
     85    ))
     86
     87def emit_numeric_property(f, property_code, NaN_set, cp_value_map):
     88    s = string.Template(r"""    namespace ${prop_enum_up}_ns {
     89        /** Code Point Ranges for ${prop_enum} mapping to NaN
     90        ${NaN_set_ranges}**/
     91
     92        const UnicodeSet NaN_set
     93        ${NaN_set_value};
     94
     95       const unsigned buffer_length = ${buffer_length};
     96        const static char __attribute__ ((aligned (32))) string_buffer[${allocation_length}] = u8R"__(${string_buffer})__";
     97
     98        const static std::vector<codepoint_t> defined_cps = {
     99        ${explicitly_defined_cps}};
     100        static NumericPropertyObject property_object(${prop_enum},
     101                                                    NaN_set,
     102                                                    static_cast<const char *>(string_buffer),
     103                                                    buffer_length,
     104                                                    defined_cps);
     105    }
     106""")
     107    cps = sorted(cp_value_map.keys())
     108    string_buffer = ""
     109    for cp in cps:
     110        string_buffer += cp_value_map[cp] + "\n"
     111    buffer_length = len(string_buffer.encode("utf-8"))
     112    f.write(s.substitute(prop_enum = property_code,
     113    prop_enum_up = property_code.upper(),
     114    string_buffer = string_buffer,
     115    buffer_length = buffer_length,
     116    allocation_length = (buffer_length + 255) & -256,
     117    NaN_set_ranges = cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(NaN_set)], ',', 8),
     118    NaN_set_value = NaN_set.showC(12),
    83119    explicitly_defined_cp_count = len(cps),
    84120    explicitly_defined_cps = cformat.multiline_fill(['0x%04x' % cp for cp in cps], ',', 8)
     
    203239        elif isinstance(property_object, StringPropertyObject):
    204240            emit_string_property(f, property_code, property_object.null_str_set, property_object.reflexive_set, property_object.cp_value_map)
     241        elif isinstance(property_object, NumericPropertyObject):
     242            emit_numeric_property(f, property_code, property_object.NaN_set, property_object.cp_value_map)
    205243        elif isinstance(property_object, ObsoletePropertyObject):
    206244            emit_Obsolete_property(f, property_code)
    207         else: return
     245        else:
     246            print("%s: unsupported property.")
     247            return
    208248        self.supported_props.append(property_code)
    209249
     
    253293        f = cformat.open_header_file_for_write(basename)
    254294        cformat.write_imports(f, ['"PropertyAliases.h"', '"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    255         prop_code_list = ['na', 'dm', 'suc', 'slc', 'stc', 'na1', 'isc']
     295        prop_code_list = ['na', 'dm', 'suc', 'slc', 'stc', 'na1', 'isc', 'nv']
    256296        f.write("\nnamespace UCD {\n")
    257297        for p in prop_code_list:
  • icGREP/icgrep-devel/UCD-scripts/UCD_property_objects.py

    r5668 r5670  
    133133    def getPropertyKind(self): return "Binary"
    134134
    135     def setID(self, prop_code, long_name):
    136         PropertyObject.setID(self, prop_code, long_name)
    137 
    138135    def addDataRecord(self, cp_lo, cp_hi, v):
    139136        if v==None or v in self.property_value_lookup_map[v] == 'Y':
     
    147144    def __init__(self):
    148145        PropertyObject.__init__(self)
     146        self.cp_value_map = {}
     147        self.NaN_set = empty_uset()
    149148
    150149    def getPropertyKind(self): return "Numeric"
     150
     151    def addDataRecord(self, cp_lo, cp_hi, stringValue):
     152        if stringValue == '':
     153            self.NaN_set = uset_union(self.NaN_set, range_uset(cp_lo, cp_hi))
     154        else:
     155            for cp in range(cp_lo, cp_hi+1):
     156                self.cp_value_map[cp] = stringValue
     157
     158    def finalizeProperty(self):
     159        explicitly_defined_cps = empty_uset()
     160        for cp in self.cp_value_map.keys():
     161            explicitly_defined_cps = uset_union(explicitly_defined_cps, singleton_uset(cp))
     162        # set NaN default
     163        self.NaN_set = uset_union(self.NaN_set, uset_complement(explicitly_defined_cps))
     164
    151165
    152166class ExtensionPropertyObject(PropertyObject):
Note: See TracChangeset for help on using the changeset viewer.