Changeset 5653 for icGREP


Ignore:
Timestamp:
Sep 29, 2017, 9:59:40 AM (19 months ago)
Author:
cameron
Message:

Updates for Python 3; some refactoring

Location:
icGREP/icgrep-devel/UCD-scripts
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5652 r5653  
    2121        if m:
    2222            UCD_config.version = m.group(1)
    23             print "Version %s" % m.group(1)
     23            print("Version %s" % m.group(1))
    2424
    2525trivial_name_char_re = re.compile('[-_\s]')
     
    101101                if m.group(1) != '0000' or m.group(2) != '10FFFF': raise Exception("Bad missing spec: " + s)
    102102                cname = canonicalize(m.group(3))
    103                 if not property_lookup_map.has_key(cname): raise Exception("Bad missing property: " + s)
     103                if not cname in property_lookup_map: raise Exception("Bad missing property: " + s)
    104104                missing_specs[property_lookup_map[cname]] = m.group(4)
    105105            continue  # skip comment and blank lines
     
    107107        if not m: raise Exception("Unknown property value alias syntax: %s" % t)
    108108        prop_code = canonicalize(m.group(1))
    109         if not property_lookup_map.has_key(prop_code): raise Exception("Property code: '%s' is unknown" % prop_code)
     109        if not prop_code in property_lookup_map: raise Exception("Property code: '%s' is unknown" % prop_code)
    110110        else: prop_code = property_lookup_map[prop_code]
    111         if not property_value_list.has_key(prop_code):
     111        if not prop_code in property_value_list:
    112112            property_value_list[prop_code] = []
    113113            property_value_enum_integer[prop_code] = {}
     
    177177# Ensure that the default value for the property is first in the list of property values,
    178178# and that all codepoints not explicitly identified in the file are mapped to this default.
    179 def parse_UCD_enumerated_property_map(property_code, vlist, canon_map, mapfile, default_value = None):
     179def parse_UCD_enumerated_property_map(property_code, vlist, canon_map, mapfile):
    180180    value_map = {}
    181181    for v in vlist: value_map[v] = empty_uset()
    182     if default_value == None:
    183         name_list_order = []
    184     else:
    185         # Default value must always be first in the final enumeration order.
    186         name_list_order = [default_value]
     182    name_list_order = []
     183    default_specs = []
    187184    f = open(UCD_config.UCD_src_dir + "/" + mapfile)
    188185    lines = f.readlines()
     
    191188            m = UCD_missing_regexp1.match(t)
    192189            if m:
    193                 if default_value != None:
    194                     raise Exception("Default value already specified, extraneous @missing spec: %s" % t)
    195190                (missing_lo, missing_hi, default_value) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
    196191                default_value = canonicalize(default_value)
    197                 if not canon_map.has_key(default_value):  raise Exception("Unknown default property value name '%s'" % default_value)
     192                if not default_value in canon_map:  raise Exception("Unknown default property value name '%s'" % default_value)
    198193                if missing_lo != 0 or missing_hi != 0x10FFFF: raise Exception("Unexpected missing data range '%x, %x'" % (missing_lo, missing_hi))
    199194                default_value = canon_map[default_value]
     
    202197                if default_value in name_list_order: name_list_order.remove(default_value)
    203198                name_list_order = [default_value] + name_list_order
     199                default_specs.append((missing_lo, missing_hi, default_value))
    204200            continue  # skip comment and blank lines
    205201        m = UCD_point_name_regexp.match(t)
     
    213209            newset = range_uset(cp_lo, cp_hi)
    214210        cname = canonicalize(name)
    215         if not canon_map.has_key(cname):  raise Exception("Unknown property or property value name '%s'" % cname)
     211        if not cname in canon_map:  raise Exception("Unknown property or property value name '%s'" % cname)
    216212        name = canon_map[cname]
    217213        if not name in name_list_order:
    218214            name_list_order.append(name)
    219215        value_map[name] = uset_union(value_map[name], newset)
     216    for (default_lo, default_hi, default_val) in default_specs:
     217        value_map = add_Default_Values(value_map, default_lo, default_hi, default_val)
     218    return (name_list_order, value_map)
     219
     220def add_Default_Values(value_map, default_lo, default_hi, default_val):
     221    default_region = range_uset(default_lo, default_hi)
    220222    explicitly_defined_cps = empty_uset()
    221223    for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
    222     need_default_value = uset_complement(explicitly_defined_cps)
    223     if default_value != None:
    224         value_map[default_value] = uset_union(value_map[default_value], need_default_value)
    225     elif uset_popcount(need_default_value) > 0:
    226         print "Warning no default value, but %i codepoints not specified" % uset_popcount(need_default_value)
    227     return (name_list_order, value_map)
     224    need_default_value = uset_difference(default_region, explicitly_defined_cps)
     225    if default_val in value_map:
     226        value_map[default_val] = uset_union(value_map[default_val], need_default_value)
     227    else:
     228        value_map[default_val] = need_default_value
     229    return value_map
    228230
    229231def parse_ScriptExtensions_txt(scripts, canon_map):
     
    239241            # sc = canonical_property_value_map[canonicalize(scx)]
    240242            sc = scx
    241             if value_map.has_key(sc):
     243            if sc in value_map:
    242244                value_map[sc] = uset_union(value_map[sc], scx_set_map[scx_list])
    243245            else: value_map[sc] = scx_set_map[scx_list]
    244246        explicitly_defined_set = uset_union(explicitly_defined_set, scx_set_map[scx_list])
    245247    for v in scripts:
    246         if value_map.has_key(v):
     248        if v in value_map:
    247249            value_map[v] = uset_union(value_map[v], uset_difference(script_map[v], explicitly_defined_set))
    248         elif script_map.has_key(v):
     250        elif v in script_map:
    249251            value_map[v] = script_map[v]
    250252        else: value_map[v] = empty_uset()
     
    271273        if not canon_map == None:
    272274            cname = canonicalize(name)
    273             if not canon_map.has_key(cname):
     275            if not cname in canon_map:
    274276                raise Exception("Unknown property or property value name '%s'" % cname)
    275277            name = canon_map[cname]
    276         if not value_map.has_key(name):
     278        if not name in value_map:
    277279            value_map[name] = newset
    278280            name_list_order.append(name)
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5652 r5653  
    4747        self.property_data_headers = []
    4848        self.missing_specs = {}
     49        self.binary_properties = {}
    4950
    5051    def load_property_name_info(self):
     
    7879        #
    7980        for p in self.property_enum_name_list:
    80            if self.property_value_list.has_key(p):
     81           if p in self.property_value_list:
    8182              if not self.property_kind_map[p] == 'Binary':
    8283                  enum_text = cformat.multiline_fill(self.property_value_list[p], ',', 12)
     
    99100        cformat.close_header_file(f)
    100101
    101  
    102102    def generate_property_value_file(self, filename_root, property_code):
    103         canon_map = self.property_value_lookup_map[property_code]
    104         if self.missing_specs.has_key(property_code):
    105             default_value = canon_map[canonicalize(self.missing_specs[property_code])]
    106         else: default_value = None
    107103        vlist = self.property_value_list[property_code]
    108104        canon_map = self.property_value_lookup_map[property_code]
    109         (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
     105        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
     106        canon_map = self.property_value_lookup_map[property_code]
     107        if property_code in self.missing_specs:
     108            default_value = canon_map[canonicalize(self.missing_specs[property_code])]
     109            value_map = add_Default_Values(value_map, 0, 0x10FFFF, default_value)
    110110        independent_prop_values = len(prop_values)
    111111        for v in vlist:
     
    116116        #
    117117        self.property_value_list[property_code] = prop_values
    118         basename = os.path.basename(filename_root)
    119         f = cformat.open_header_file_for_write(os.path.basename(filename_root))
    120         cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    121         f.write("\nnamespace UCD {\n")
    122         f.write("  namespace %s_ns {\n" % property_code.upper())
    123         f.write("    const unsigned independent_prop_values = %s;\n" % independent_prop_values)
    124118        if property_code == 'gc':
    125119            # special logic for derived categories
     
    132126            value_map['Z'] = union_of_all([value_map[v] for v in ['Zs', 'Zl', 'Zp']])
    133127            value_map['C'] = union_of_all([value_map[v] for v in ['Cc', 'Cf', 'Cs', 'Co', 'Cn']])
     128        basename = os.path.basename(filename_root)
     129        f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     130        cformat.write_imports(f, ['"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
     131        f.write("\nnamespace UCD {\n")
     132        f.write("  namespace %s_ns {\n" % property_code.upper())
     133        f.write("    const unsigned independent_prop_values = %s;\n" % independent_prop_values)
    134134        for v in prop_values:
    135135            f.write("    /** Code Point Ranges for %s\n    " % v)
    136             f.write(cformat.multiline_fill(['[%s, %s]' % (lo, hi) for (lo, hi) in uset_to_range_list(value_map[v])], ',', 4))
     136            f.write(cformat.multiline_fill(['[%04x, %04x]' % (lo, hi) for (lo, hi) in uset_to_range_list(value_map[v])], ',', 4))
    137137            f.write("**/\n")
    138138            f.write("    const UnicodeSet %s_Set \n" % v.lower())
    139139            f.write(value_map[v].showC(8) + ";\n")
    140         print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
     140        print("%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()])))
    141141        set_list = ['&%s_Set' % v.lower() for v in prop_values]
    142142        f.write("    static EnumeratedPropertyObject property_object\n")
     
    176176        f.write("\n        }};\n    }\n}\n")
    177177        cformat.close_header_file(f)
    178         print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
     178        print("%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()])))
    179179        self.supported_props.append(property_code)
    180180        self.property_data_headers.append(basename)
     
    199199        f.write("}\n\n")
    200200        cformat.close_header_file(f)
    201         print "%s: %s bytes" % (basename, sum([prop_map[p].bytes() for p in prop_map.keys()]))
     201        print("%s: %s bytes" % (basename, sum([prop_map[p].bytes() for p in prop_map.keys()])))
    202202        self.supported_props += props
     203        for p in prop_map.keys(): self.binary_properties[p] = prop_map[p]
    203204        self.property_data_headers.append(basename)
    204205
     
    284285    ucd.generate_property_value_file('HangulSyllableType', 'hst')
    285286    #
    286     # Bidi_Class
    287     ucd.generate_property_value_file('extracted/DerivedBidiClass', 'bc')
    288     #
    289287    # Bidi Mirroroing from DerivedCoreProperties.txt
    290288    ucd.generate_binary_properties_file('extracted/DerivedBinaryProperties')
     
    306304    # Binary normalization properties.
    307305    ucd.generate_binary_properties_file('DerivedNormalizationProps')
     306    #
     307    # Bidi_Class
     308    ucd.generate_property_value_file('extracted/DerivedBidiClass', 'bc')
    308309
    309310    #
  • icGREP/icgrep-devel/UCD-scripts/casefold.py

    r5642 r5653  
    4242      fold_val = m.group(3)
    4343      if fold_t == 'T':
    44          print "Skipping Turkic entry"
     44         print("Skipping Turkic entry")
    4545         continue  # skip Turkic
    4646      if fold_t == 'F':
     
    4848      else:
    4949          fold_val = int(fold_val, 16)
    50       if fold_value.has_key(codepoint): fold_value[codepoint].append(fold_val)
     50      if codepoint in fold_value: fold_value[codepoint].append(fold_val)
    5151      else: fold_value[codepoint] = [fold_val]
    5252   return (fold_type, fold_value)
     
    8282      for v in folds:
    8383        if not isinstance(v, int): continue # skip nonsimple case folds
    84         if not cl_map.has_key(v): cl_map[v] = [k]
     84        if not v in cl_map: cl_map[v] = [k]
    8585        else: cl_map[v].append(k)
    86         if not cl_map.has_key(k): cl_map[k] = [v]
     86        if not k in cl_map: cl_map[k] = [v]
    8787        else: cl_map[k].append(v)
    8888   newEntries = True
     
    124124         projected = []
    125125         for (cp0, offset) in open_entries:
    126             even_odd_offset_group = (abs(cp - cp0)/ abs(offset)) & 1
     126            even_odd_offset_group = int(abs(cp - cp0)/ abs(offset)) & 1
    127127            if even_odd_offset_group == 0:
    128128               projected_foldcp = cp + offset
  • icGREP/icgrep-devel/UCD-scripts/generate_UCD_tests.py

    r5143 r5653  
    180180    ucd = UCD_test_generator()
    181181    ucd.load_all()
    182     print "<greptest>"
     182    print("<greptest>")
    183183    for t in ucd.generate_level_1_property_terms(1, ['sc', 'gc']):
    184         print t
     184        print(t)
    185185    for p in ucd.generate_random_property_expressions(True):
    186         print p
    187     print "</greptest>"
     186        print(p)
     187    print("</greptest>")
    188188
    189189if __name__ == "__main__":
  • icGREP/icgrep-devel/UCD-scripts/unicode_set.py

    r5153 r5653  
    2828quad_bits = 1 << log2_quad_bits
    2929mod_quad_bit_mask = quad_bits - 1
    30 UnicodeQuadCount = 0x110000 / quad_bits #  2**log2_quad_bits codepoints per quad
     30UnicodeQuadCount = int(0x110000 / quad_bits) #  2**log2_quad_bits codepoints per quad
    3131FullQuadMask = (1<<(quad_bits)) - 1
    3232run_bytes = 4
     
    6262   # printing
    6363   def showC(self, indent = 4):
    64       hex_specifier =  "%%#0%ix" % (quad_bits/4 + 2)
     64      hex_specifier =  "%%#0%ix" % (int(quad_bits/4) + 2)
    6565      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
    6666      formatted_runs = ['{%s, %i}' % (runtype[r[0]], r[1]) for r in self.runs]
     
    7575
    7676   def bytes(self):
    77        return (len(self.runs) * run_bytes) + (len(self.quads) * quad_bits/8)
     77       return (len(self.runs) * run_bytes) + (len(self.quads) * int(quad_bits/8))
    7878
    7979
     
    154154
    155155def uset_member(s, codepoint):
    156    quad_no = codepoint / quad_bits
     156   quad_no = int(codepoint / quad_bits)
    157157   quad_val = 1 << (codepoint & mod_quad_bit_mask)
    158158   it = Uset_Iterator(s)   
     
    415415    f.close()
    416416    s = parse_UCD_set(lines)
    417     print s.showC(vname)
    418 
    419 
     417    print(s.showC(vname))
     418
     419
Note: See TracChangeset for help on using the changeset viewer.