Ignore:
Timestamp:
Oct 11, 2017, 6:48:36 PM (18 months ago)
Author:
cameron
Message:

Update to UCD 10.0.0

Location:
icGREP/icgrep-devel/UCD-scripts
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_config.py

    r5652 r5686  
    66#
    77#
    8 UCD_src_dir = "UCD9.0"
     8UCD_src_dir = "UCD-10.0"
    99
    1010UCD_output_dir = "generated"
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5685 r5686  
    599599    ucd.generate_property_value_file('auxiliary/WordBreakProperty', 'WB')
    600600    #
     601    # Vertical orientation property
     602    ucd.generate_property_value_file('VerticalOrientation', 'vo')
     603
    601604    # East Asian Width - can use either source
    602605    ucd.generate_property_value_file('EastAsianWidth', 'ea')
  • icGREP/icgrep-devel/UCD-scripts/generate_UCD_tests.py

    r5653 r5686  
    2424
    2525    def load_property_name_info(self):
    26         (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
     26        (self.property_enum_name_list, self.property_object_map) = parse_PropertyAlias_txt()
     27        self.property_lookup_map = getPropertyLookupMap(self.property_object_map)
     28        self.full_name_map = {}
     29        for p in self.property_enum_name_list:
     30            self.full_name_map[p] = self.property_object_map[p].getPropertyFullName()
    2731
    2832    def load_property_value_info(self):
    29         (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
     33        initializePropertyValues(self.property_object_map, self.property_lookup_map)
    3034
    31     def load_enumerated_property_data(self, filename_root, property_code):
    32         vlist = self.property_value_list[property_code]
    33         canon_map = self.property_value_lookup_map[property_code]
    34         (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
    35         self.enum_value_map[property_code] = value_map
     35    def load_property_value_file(self, filename_root, property_code):
     36        property_object = self.property_object_map[property_code]
     37        parse_property_data(self.property_object_map[property_code], filename_root + '.txt')
    3638
    3739    def load_ScriptExtensions_data(self):
    38         filename_root = 'ScriptExtensions'
    3940        property_code = 'scx'
    40         vlist = self.property_value_list['sc']
    41         (prop_values, value_map) = parse_ScriptExtensions_txt(vlist, self.property_value_lookup_map['sc'])
    42         self.enum_value_map['scx'] = value_map
     41        extension_object = self.property_object_map['scx']
     42        extension_object.setBaseProperty(self.property_object_map['sc'])
     43        parse_property_data(extension_object, 'ScriptExtensions.txt')
    4344       
    44     def load_binary_properties_data(self, filename_root):
    45         (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
    46         for p in props:
    47             self.binary_value_map[p] = prop_map[p]
     45    def load_multisection_properties_file(self, filename_root):
     46        props = parse_multisection_property_data(filename_root + '.txt', self.property_object_map, self.property_lookup_map)
     47        for p in sorted(props):
     48            property_object = self.property_object_map[p]
    4849
    4950    def load_others(self):
     
    5152        self.binary_value_map['ANY'] = range_uset(0, 0x10FFFF)
    5253        self.binary_value_map['ASCII'] = range_uset(0, 0x7F)
    53         self.binary_value_map['ASSIGNED'] = uset_complement(self.enum_value_map['gc']['Cn'])     
    54         self.binary_value_map['White_Space'] = self.binary_value_map['WSpace']
    55         self.binary_value_map['Uppercase'] = self.binary_value_map['Upper']
    56         self.binary_value_map['Lowercase'] = self.binary_value_map['Lower']
    57         self.binary_value_map['Alphabetic'] = self.binary_value_map['Alpha']
    58         self.binary_value_map['Noncharacter_Code_Point'] = self.binary_value_map['NChar']
    59         self.binary_value_map['Default_Ignorable_Code_Point'] = self.binary_value_map['DI']
     54        self.binary_value_map['ASSIGNED'] = uset_complement(self.property_object_map['gc'].value_map['Cn'])     
     55        self.binary_value_map['White_Space'] = self.property_object_map['WSpace'].value_map['Y']
     56        self.binary_value_map['Uppercase'] = self.property_object_map['Upper'].value_map['Y']
     57        self.binary_value_map['Lowercase'] = self.property_object_map['Lower'].value_map['Y']
     58        self.binary_value_map['Alphabetic'] = self.property_object_map['Alpha'].value_map['Y']
     59        self.binary_value_map['Noncharacter_Code_Point'] = self.property_object_map['NChar'].value_map['Y']
     60        self.binary_value_map['Default_Ignorable_Code_Point'] = self.property_object_map['DI'].value_map['Y']
    6061
    6162    def load_all(self):
     
    6768        #
    6869        # The Block property
    69         self.load_enumerated_property_data('Blocks', 'blk')
     70        self.load_property_value_file('Blocks', 'blk')
    7071        #
    7172        # Scripts
    72         self.load_enumerated_property_data('Scripts', 'sc')
     73        self.load_property_value_file('Scripts', 'sc')
    7374        #
    7475        # Script Extensions
     
    7677        #
    7778        # General Category
    78         self.load_enumerated_property_data('extracted/DerivedGeneralCategory', 'gc')
     79        self.load_property_value_file('extracted/DerivedGeneralCategory', 'gc')
    7980        #
    8081        # Core Properties
    81         self.load_binary_properties_data('DerivedCoreProperties')
     82        self.load_multisection_properties_file('DerivedCoreProperties')
    8283        #
    83         self.load_binary_properties_data('PropList')
     84        self.load_multisection_properties_file('PropList')
    8485        self.load_others()
    8586
     
    9697                terms.append(template % (lbl, p, uset_popcount(uset_intersection(self.all_good_set, s))))
    9798        if 'gc' in propgroups:
    98             for v in self.property_value_list['gc']:
    99                 s = self.enum_value_map['gc'][v]
     99            obj = self.property_object_map['gc']
     100            for v in obj.name_list_order:
     101                s = obj.value_map[v]
    100102                lbl = 'p'
    101103                if randint(1,10) <= negated_per_10:
     
    104106                terms.append(template % (lbl, v, uset_popcount(uset_intersection(self.all_good_set, s))))
    105107        if 'sc' in propgroups:
    106             for v in self.property_value_list['sc']:
    107                 s = self.enum_value_map['sc'][v]
    108                 vname = self.property_value_full_name_map['sc'][v]
     108            obj = self.property_object_map['sc']
     109            for v in obj.name_list_order:
     110                s = obj.value_map[v]
     111                vname = obj.property_value_full_name_map[v]
    109112                lbl = 'p'
    110113                if randint(1,10) <= negated_per_10:
     
    113116                terms.append(template % (lbl, vname, uset_popcount(uset_intersection(self.all_good_set, s))))
    114117        if 'scx' in propgroups:
    115             for v in self.property_value_list['sc']:
    116                 s = self.enum_value_map['scx'][v]
    117                 vname = self.property_value_full_name_map['sc'][v]
     118            for v in self.property_object_map['sc'].name_list_order:
     119                s = self.property_object_map['scx'][v]
     120                vname = self.property_object_map['sc'].property_value_full_name_map[v]
    118121                lbl = 'p'
    119122                if randint(1,10) <= negated_per_10:
     
    127130        (p2, t2) = a2
    128131        op = randint(0,2)
    129         s1 = self.enum_value_map[p1][t1]
     132        s1 = self.property_object_map[p1].value_map[t1]
    130133        if p2 == 'others':
    131134            s2 = self.binary_value_map[t2]
    132         else: s2 = self.enum_value_map[p2][t2]
     135        else: s2 = self.property_object_map[p2].value_map[t2]
    133136        if op == 0: s3 = uset_intersection(s1, s2)
    134137        elif op == 1: s3 = uset_difference(s1, s2)
    135138        elif op == 2: s3 = uset_union(s1, s2)
    136139        s3 = uset_intersection(s3, self.all_good_set)
    137         if p1 == 'sc' or p1 == 'scx': t1 = self.property_value_full_name_map['sc'][t1]
    138         if p2 == 'sc' or p2 == 'scx': t2 = self.property_value_full_name_map['sc'][t2]
     140        if p1 == 'sc' or p1 == 'scx': t1 = self.property_object_map['sc'].property_value_full_name_map[t1]
     141        if p2 == 'sc' or p2 == 'scx': t2 = self.property_object_map['sc'].property_value_full_name_map[t2]
    139142        if p1 == 'scx': t1 = 'scx=' + t1
    140143        if p2 == 'scx': t2 = 'scx=' + t2
     
    152155
    153156    def generate_random_property_expressions(self, useLookbehindAssertions = False):
    154         gc = self.property_value_list['gc']
    155         sc = self.property_value_list['sc']
     157        gc = self.property_object_map['gc'].name_list_order
     158        sc = self.property_object_map['sc'].name_list_order
    156159        others = ['Alphabetic', 'Uppercase', 'Lowercase', 'White_Space', 'Noncharacter_Code_Point', 'Default_Ignorable_Code_Point', 'ANY', 'ASCII', 'ASSIGNED']
    157160        exprs = []
Note: See TracChangeset for help on using the changeset viewer.