Ignore:
Timestamp:
Dec 27, 2014, 8:50:24 PM (4 years ago)
Author:
cameron
Message:

Clean up default value processing for enumerated properties; uset_popcount added

Location:
proto/charsetcompiler/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4364 r4365  
    9292UCD_range_name_regexp = re.compile("^([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s*;\s*((?:[-A-Za-z0-9_]+\s+)*[-A-Za-z0-9_]+)\s*(?:[;#]|$)")
    9393
    94 def parse_UCD_enumerated_property_map(mapfile, canonical_name_lookup_map):
     94def parse_UCD_enumerated_property_map(mapfile, canonical_name_lookup_map, default_value = None):
    9595   value_map = {}
    9696   name_list_order = []
    97    missingSpecFound = False
    9897   f = open(UCD_dir + "/" + mapfile)
    9998   lines = f.readlines()
     
    102101        m = UCD_missing_regexp1.match(t)
    103102        if m:
    104           if missingSpecFound:
    105             raise Exception("@missing duplicate spec: %s" % t)
    106           missingSpecFound = True
     103          if default_value != None:
     104            raise Exception("Default value already specified, extraneous @missing spec: %s" % t)
    107105          (missing_lo, missing_hi, default_value) = (int(m.group(1), 16), int(m.group(2), 16), m.group(3))
    108106          default_value = canonicalize(default_value)
     
    128126      else: value_map[name] = uset_union(value_map[name], newset)
    129127   explicitly_defined_cps = empty_uset()
    130    if missingSpecFound:
    131      for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
    132      need_default_value = uset_complement(explicitly_defined_cps)
     128   for k in value_map.keys(): explicitly_defined_cps = uset_union(explicitly_defined_cps, value_map[k])
     129   need_default_value = uset_complement(explicitly_defined_cps)
     130   if default_value != None:
    133131     if value_map.has_key(default_value):
    134132       value_map[default_value] = uset_union(value_map[default_value], need_default_value)
     
    136134       value_map[default_value] = need_default_value
    137135       name_list_order.append(default_value)
     136   elif uset_popcount(need_default_value) > 0:
     137     print "Warning no default value, but %i codepoints not specified" % uset_popcount(need_default_value)
    138138   return (name_list_order, value_map)
    139139
     
    316316
    317317 
    318     def generate_property_value_file(self, filename_root, property_code):
     318    def generate_property_value_file(self, filename_root, property_code, default_value = None):
    319319       canonical_property_value_map = self.property_value_lookup_map[property_code]
    320        (prop_values, value_map) = parse_UCD_enumerated_property_map(filename_root + '.txt', canonical_property_value_map)
     320       (prop_values, value_map) = parse_UCD_enumerated_property_map(filename_root + '.txt', canonical_property_value_map, default_value)
    321321       for v in self.property_value_list[property_code]:
    322322          if not v in prop_values:
     
    329329       f.write("\nnamespace UCD {\n")
    330330       f.write("  namespace %s {\n" % property_code.upper())
    331        all_explicit_values = union_of_all([value_map[v] for v in self.property_value_list[property_code]])
    332        missing_values = uset_complement(all_explicit_values)
    333        if self.missing_specs.has_key(property_code):
    334          default = canonicalize(self.missing_specs[property_code])
    335          if not self.property_value_lookup_map[property_code].has_key(default): raise Exception("Cannot process default specification '%s'" % default)
    336          default_key = self.property_value_lookup_map[property_code][default]
    337          value_map[default_key] = uset_union(value_map[default_key], missing_values)   
    338331       if property_code == 'gc':
    339332         # special logic for derived categories
     
    350343         f.write(value_map[v].showC(6) + ";\n")
    351344       print "%s: %s bytes" % (basename, sum([value_map[v].bytes() for v in value_map.keys()]))
    352        if not self.missing_specs.has_key(property_code):
    353          f.write("    const UnicodeSet Missing_Set \n")
    354          f.write(missing_values.showC(6) + ";\n")
    355345       set_list = ['%s_Set' % v.lower() for v in self.property_value_list[property_code]]
    356346       f.write("    const EnumeratedPropertyObject property_object\n")
  • proto/charsetcompiler/UCD/unicode_set.py

    r4191 r4365  
    160160   it.advance(quad_no)
    161161   return (it.get_quad() & quad_val) != 0
    162  
     162
     163def uset_popcount(s):
     164    popcount = 0
     165    it = Uset_Iterator(s)
     166    while not it.at_end():
     167        (runtype, n) = it.current_run()
     168        if runtype == Empty:
     169            it.advance(n)
     170        elif runtype == Full:
     171            popcount += n * quad_bits
     172            it.advance(n)
     173        else:
     174            popcount += popcount_quad(it.get_quad)
     175            it.advance(1)
     176    return popcount
     177
     178def popcount_quad(q):
     179    c = 0
     180    while q != 0:
     181        q = q & (q - 1) # clear low bit
     182        c += 1
     183    return c
     184
    163185def uset_complement (s):
    164186   assert s.quad_count == UnicodeQuadCount
     
    178200            it.advance(1)
    179201   return iset
    180 
    181202
    182203def uset_intersection (s1, s2):
Note: See TracChangeset for help on using the changeset viewer.