Ignore:
Timestamp:
Dec 27, 2014, 7:10:44 PM (4 years ago)
Author:
cameron
Message:

Parsing enumerated properties now adds default data from @missing specs

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4363 r4364  
    8888#
    8989UCD_skip = re.compile("^#.*$|^\s*$")
     90UCD_missing_regexp1 = re.compile("^#\s*@missing:\s*([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s*;\s*([-A-Za-z0-9_]+)\s*(?:[;#]|$)")
    9091UCD_point_name_regexp = re.compile("^([0-9A-F]{4,6})\s*;\s*((?:[-A-Za-z0-9_]+\s+)*[-A-Za-z0-9_]+)\s*(?:[;#]|$)")
    9192UCD_range_name_regexp = re.compile("^([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s*;\s*((?:[-A-Za-z0-9_]+\s+)*[-A-Za-z0-9_]+)\s*(?:[;#]|$)")
     93
     94def parse_UCD_enumerated_property_map(mapfile, canonical_name_lookup_map):
     95   value_map = {}
     96   name_list_order = []
     97   missingSpecFound = False
     98   f = open(UCD_dir + "/" + mapfile)
     99   lines = f.readlines()
     100   for t in lines:
     101      if UCD_skip.match(t):
     102        m = UCD_missing_regexp1.match(t)
     103        if m:
     104          if missingSpecFound:
     105            raise Exception("@missing duplicate spec: %s" % t)
     106          missingSpecFound = True
     107          (missing_lo, missing_hi, default_value) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
     108          default_value = canonicalize(default_value)
     109          if not canonical_name_lookup_map.has_key(default_value):  raise Exception("Unknown defauly property value name '%s'" % default_value)
     110          if missing_lo != 0 or missing_hi != 0x10FFFF: raise Exception("Unexpected missing data range '%x, %x'" % (missing_lo, missing_hi))
     111          default_value = canonical_name_lookup_map[default_value]
     112        continue  # skip comment and blank lines
     113      m = UCD_point_name_regexp.match(t)
     114      if m:
     115        (codepoint, name) = (int(m.group(1), 16), m.group(2))
     116        newset = singleton_uset(codepoint)
     117      else:
     118        m = UCD_range_name_regexp.match(t)
     119        if not m: raise Exception("Unknown syntax: %s" % t)
     120        (cp_lo, cp_hi, name) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
     121        newset = range_uset(cp_lo, cp_hi)
     122      cname = canonicalize(name)
     123      if not canonical_name_lookup_map.has_key(cname):  raise Exception("Unknown property or property value name '%s'" % cname)
     124      name = canonical_name_lookup_map[cname]
     125      if not value_map.has_key(name):
     126        value_map[name] = newset
     127        name_list_order.append(name)
     128      else: value_map[name] = uset_union(value_map[name], newset)
     129   explicitly_defined_cps = empty_uset()
     130   if missingSpecFound:
     131     for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
     132     need_default_value = uset_complement(explicitly_defined_cps)
     133     if value_map.has_key(default_value):
     134       value_map[default_value] = uset_union(value_map[default_value], need_default_value)
     135     else:
     136       value_map[default_value] = need_default_value
     137       name_list_order.append(default_value)
     138   return (name_list_order, value_map)
    92139
    93140def parse_UCD_codepoint_name_map(mapfile, canonical_name_lookup_map = None):
     
    97144   lines = f.readlines()
    98145   for t in lines:
    99       if UCD_skip.match(t): continue  # skip comment and blank lines
     146      if UCD_skip.match(t):
     147        continue  # skip comment and blank lines
    100148      m = UCD_point_name_regexp.match(t)
    101149      if m:
     
    270318    def generate_property_value_file(self, filename_root, property_code):
    271319       canonical_property_value_map = self.property_value_lookup_map[property_code]
    272        (prop_values, value_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_value_map)
     320       (prop_values, value_map) = parse_UCD_enumerated_property_map(filename_root + '.txt', canonical_property_value_map)
    273321       for v in self.property_value_list[property_code]:
    274322          if not v in prop_values:
    275323             #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
    276              print("Property %s value %s missing" % (self.full_name_map[property_code], v))
     324             print("Warning property %s has no instance of value %s" % (self.full_name_map[property_code], v))
    277325             value_map[v] = empty_uset()
    278326       basename = os.path.basename(filename_root)
Note: See TracChangeset for help on using the changeset viewer.