Changeset 4183


Ignore:
Timestamp:
Sep 21, 2014, 6:29:35 PM (4 years ago)
Author:
cameron
Message:

add multiline_fill, move formatting to cformat.py, add property_kind

Location:
proto/charsetcompiler/UCD
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4181 r4183  
    1010#
    1111#
    12 import re, string, os.path
     12import re, string, os.path, cformat
    1313from unicode_set import *
    1414
     
    2525#  PropertyAliases.txt
    2626#
     27UCD_property_section_regexp = re.compile("^#\s*([-A-Za-z_0-9]+)\s*Properties\s*$")
    2728UCD_property_alias_regexp = re.compile("^([-A-Za-z_0-9]+)\s*;\s*([-A-Za-z_0-9]+)([^#]*)")
    2829
     
    3132   full_name_map = {}
    3233   property_lookup_map = {}
     34   property_kind_map = {}
     35   property_kind = "unspecified"
    3336   f = open(UCD_dir + "/" + 'PropertyAliases.txt')
    3437   lines = f.readlines()
    3538   for t in lines:
     39      m = UCD_property_section_regexp.match(t)
     40      if m:
     41        property_kind = m.group(1)
    3642      if UCD_skip.match(t): continue  # skip comment and blank lines
    3743      m = UCD_property_alias_regexp.match(t)
     
    4652      property_lookup_map[canonicalize(prop_preferred_full_name)] = prop_enum
    4753      for a in prop_aliases: property_lookup_map[canonicalize(a)] = prop_enum
    48    return (property_enum_name_list, full_name_map, property_lookup_map)
     54      property_kind_map[prop_enum] = property_kind
     55   return (property_enum_name_list, full_name_map, property_lookup_map, property_kind_map)
    4956
    5057trivial_name_char_re = re.compile('[-_\s]')
     
    5461   else: return c
    5562
    56 
    57 header_template = r"""#ifndef %s
    58 #define %s
    59 /*
    60  *  Copyright (c) 2014 International Characters, Inc.
    61  *  This software is licensed to the public under the Open Software License 3.0.
    62  *  icgrep is a trademark of International Characters, Inc.
    63  *
    64  *  This file is generated by UCD_properties.y - manual edits may be lost.
    65  */
    66 
    67 """
    68 
    69 
    70 
    71 def open_header_file_for_write(filename):
    72    f = open(filename + '.h', 'w')
    73    hname = filename.upper() + '_H'
    74    f.write(header_template % (hname, hname))
    75    return f
    76 
    77 def close_header_file(f):
    78    f.write("\n#endif\n")
    79    f.close()
    80 
    81 def write_imports(f, import_list):
    82    for i in import_list: f.write("#include %s\n" % i)
    83 
    8463PropertyAliases_template = r"""
    8564namespace UCD {
     
    9675"""
    9776
    98 def multiline_join(item_list, items_per_line, separator = ",", closer='', indent = 4):
    99   lines = ""
    100   sep_with_space = separator + " "
    101   while len(item_list) > items_per_line:
    102     line_items = item_list[:items_per_line]
    103     lines += (" " * indent) + sep_with_space.join(line_items) + separator + "\n"
    104     item_list = item_list[items_per_line:]
    105   lines += (" " * indent) + sep_with_space.join(item_list) + closer
    106   return lines
    10777
    10878def generate_PropertyAliases_h(property_enum_name_list, full_name_map, property_lookup_map):
    109    f = open_header_file_for_write('PropertyAliases')
    110    write_imports(f, ["<string>", "<unordered_map>"])
    111    enum_text = multiline_join(property_enum_name_list, 4, ',')
    112    full_name_text = multiline_join(['"%s"' % full_name_map[e] for e in property_enum_name_list], 2, ',')
    113    map_text = multiline_join(['{"%s", %s}' % (k, property_lookup_map[k]) for k in sorted(property_lookup_map.keys())], 2, ',')
     79   f = cformat.open_header_file_for_write('PropertyAliases')
     80   cformat.write_imports(f, ["<string>", "<unordered_map>"])
     81   #enum_text = multiline_join(property_enum_name_list, 4, ',')
     82   enum_text = cformat.multiline_fill(property_enum_name_list, ',')
     83   #full_name_text = multiline_join(['"%s"' % full_name_map[e] for e in property_enum_name_list], 2, ',')
     84   full_name_text = cformat.multiline_fill(['"%s"' % full_name_map[e] for e in property_enum_name_list], ',')
     85   #map_text = multiline_join(['{"%s", %s}' % (k, property_lookup_map[k]) for k in sorted(property_lookup_map.keys())], 2,',')
     86   map_text = cformat.multiline_fill(['{"%s", %s}' % (k, property_lookup_map[k]) for k in sorted(property_lookup_map.keys())], ',')
    11487   f.write(PropertyAliases_template % (enum_text, full_name_text, map_text))
    115    close_header_file(f)
     88   cformat.close_header_file(f)
    11689
    11790#
     
    211184
    212185def generate_PropertyValueAliases_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map):
    213    f = open_header_file_for_write('PropertyValueAliases')
    214    write_imports(f, ["<string>", "<unordered_map>", '"unicode_set.h"', '"PropertyAliases.h"'])
     186   f = cformat.open_header_file_for_write('PropertyValueAliases')
     187   cformat.write_imports(f, ["<string>", "<unordered_map>", '"unicode_set.h"', '"PropertyAliases.h"'])
    215188   #  Generate the aliases for all Binary properties.
    216    full_name_text = multiline_join(['"No"', '"Yes"'], 4, ',',  '', 6)
    217    binary_map_text = multiline_join(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], 4, ',', '', 6)
     189   full_name_text = cformat.multiline_fill(['"No"', '"Yes"'], ',', 6)
     190   binary_map_text = cformat.multiline_fill(['{"n", N}', '{"y", Y}', '{"no", N}', '{"yes", Y}', '{"f", N}', '{"t", Y}', '{"false", N}', '{"true", Y}'], ',', 6)
    218191   #
    219192   enum_text = ""
     
    227200       else:
    228201         enum_text += "  namespace %s {\n    enum value_t {\n" % p.upper()
    229          enum_text += multiline_join(property_value_list[p], 4, ',','', 6)
     202         #enum_text += multiline_join(property_value_list[p], 4, ',','', 6)
     203         enum_text += cformat.multiline_fill(property_value_list[p], ',', 6)
    230204         if p == 'ccc': # Special case: add numeric value information for ccc.
    231205           enum_text += r"""
     
    233207    const uint8_t enum_val[] = {
    234208"""
    235            enum_text += multiline_join(["%s" % (property_value_enum_integer[p][e]) for e in property_value_list['ccc']], 4, ',', '', 6)
     209           #enum_text += multiline_join(["%s" % (property_value_enum_integer[p][e]) for e in property_value_list['ccc']], 4, ',', '', 6)
     210           enum_text += cformat.multiline_fill(["%s" % (property_value_enum_integer[p][e]) for e in property_value_list['ccc']], ',', 6)
    236211         enum_text += "};\n  }\n"
    237          full_name_text = multiline_join(['"%s"' % (property_value_full_name_map[p][e]) for e in property_value_list[p]], 4, ',',  '', 6)
     212         #full_name_text = multiline_join(['"%s"' % (property_value_full_name_map[p][e]) for e in property_value_list[p]], 4, ',',  '', 6)
     213         full_name_text = cformat.multiline_fill(['"%s"' % (property_value_full_name_map[p][e]) for e in property_value_list[p]], ',', 6)
    238214         name_vectors.append("    {%s}" % full_name_text)
    239          map_text = multiline_join(['{"%s", %s::%s}' % (k, p.upper(), property_value_lookup_map[p][k]) for k in sorted(property_value_lookup_map[p].keys())], 4, ',', '', 6)
     215         #map_text = multiline_join(['{"%s", %s::%s}' % (k, p.upper(), property_value_lookup_map[p][k]) for k in sorted(property_value_lookup_map[p].keys())], 4, ',', '', 6)
     216         map_text = cformat.multiline_fill(['{"%s", %s::%s}' % (k, p.upper(), property_value_lookup_map[p][k]) for k in sorted(property_value_lookup_map[p].keys())], ',', 6)
    240217         alias_maps.append("    {%s}" % map_text)
    241218     else:
     
    243220       alias_maps.append("    {}")
    244221   f.write(PropertyValueAliases_template % (enum_text, ",\n".join(name_vectors), ",\n".join(alias_maps)))
    245    close_header_file(f)
    246 
    247 
    248 
     222   cformat.close_header_file(f)
    249223
    250224
    251225def generate_PropertyValueSets_h(property_enum_name_list, property_value_list, property_value_enum_integer, property_value_full_name_map, property_value_lookup_map):
    252    f = open_header_file_for_write('PropertyValueSets')
    253    write_imports(f, ["<vector>", '"unicode_set.h"'])
     226   f = cformat.open_header_file_for_write('PropertyValueSets')
     227   cformat.write_imports(f, ["<vector>", '"unicode_set.h"'])
    254228   vec_decl_list = []
    255229   for p in property_enum_name_list:
     
    262236     else:
    263237       vec_decl_list.append("vector<UnicodeSet>(%i)" % len(property_value_list[p]))
    264    f.write(PropertyValues_template % (multiline_join(vec_decl_list, 4, ',', '', 6)))
    265    close_header_file(f)
     238   f.write(PropertyValues_template % (cformat.multiline_fill(vec_decl_list, ',', 6)))
     239   cformat.close_header_file(f)
    266240
    267241
     
    301275def generate_property_value_file(filename_root, property_code, canonical_property_value_map):
    302276   (prop_values, value_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_value_map)
    303    f = open_header_file_for_write(os.path.basename(filename_root))
    304    write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"PropertyValueSets.h"'])
     277   f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     278   cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"PropertyValueSets.h"'])
    305279   f.write("\nusing namespace UCD;\n\n")
    306280   print "%s bytes" % sum([value_map[v].bytes() for v in value_map.keys()])
    307281   for v in prop_values:
    308282     f.write(value_map[v].showC('value_sets[%s][%s::%s]' % (property_code, property_code.upper(), v)))
    309    close_header_file(f)
     283   cformat.close_header_file(f)
    310284   
    311285def generate_binary_properties_file(filename_root, canonical_property_name_map):
    312286   (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', canonical_property_name_map)
    313    f = open_header_file_for_write(os.path.basename(filename_root))
    314    write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueSets.h"'])
     287   f = cformat.open_header_file_for_write(os.path.basename(filename_root))
     288   cformat.write_imports(f, ["<vector>", '"unicode_set.h"', '"PropertyAliases.h"', '"PropertyValueSets.h"'])
    315289   f.write("\nusing namespace UCD;\n\n")
    316290   print "%s bytes" % sum([prop_map[p].bytes() for p in prop_map.keys()])
    317291   for p in sorted(props):
    318292     f.write(prop_map[p].showC('value_sets[%s][0]' % (p)))
    319    close_header_file(f)
     293   cformat.close_header_file(f)
    320294     
    321295def generate_ScriptExtensions_h():
    322296   (scx_sets, scx_map) = parse_UCD_codepoint_name_map('ScriptExtensions.txt')
    323297   map2 = {}
    324    f = open_header_file_for_write('ScriptExtensions')
    325    write_imports(f, ["<vector>", '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
     298   f = cformat.open_header_file_for_write('ScriptExtensions')
     299   cformat.write_imports(f, ["<vector>", '"PropertyAliases.h"', '"PropertyValueAliases.h"', '"unicode_set.h"'])
    326300   f.write("\nusing namespace UCD;\n\n")
    327301   for scx_list in scx_sets:
     
    334308   for k in sorted(map2.keys()):
    335309     f.write(map2[k].showC('value_sets[scx][SC::%s]' % k.lower()))
    336    close_header_file(f)
     310   cformat.close_header_file(f)
    337311
    338312
     
    340314def UCD_main():
    341315   # First parse all property names and their aliases
    342    (property_enum_name_list, full_name_map, property_lookup_map) = parse_PropertyAlias_txt()
     316   (property_enum_name_list, full_name_map, property_lookup_map, property_kind_map) = parse_PropertyAlias_txt()
     317   for k in property_kind_map.keys(): print "%sProperty(UCD::%s)" % (property_kind_map[k], k)
    343318   generate_PropertyAliases_h(property_enum_name_list, full_name_map, property_lookup_map)
    344319   # Next parse all property value names and their aliases
  • proto/charsetcompiler/UCD/unicode_set.py

    r4178 r4183  
    77#
    88# Licensed under Open Software License 3.0.
    9 import re
    10 
     9import re, cformat
    1110#
    1211# Unicode Sparse Bitset Representation
     
    6261
    6362   # printing
    64    def showC(self, name, indent = 8, entries_per_line = 4):
     63   def showC(self, name, indent = 4):
    6564      hex_specifier =  "%%#0%ix" % (quad_bits/4 + 2)
    6665      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
     66      formatted_runs = ['{%s, %i}' % (runtype[r[0]], r[1]) for r in self.runs]
     67      formatted_quads = [hex_specifier % q for q in self.quads]
    6768      setrep = (" " * indent) + ("%s.runs = {" % name)
    68       if len(self.runs) >= entries_per_line: setrep += "\n" + (" " * (indent+1))
    69       setrep += '{%s, %i}' % (runtype[self.runs[0][0]], self.runs[0][1])
    70       for i in range(1, len(self.runs)):
    71          setrep += ', '
    72          if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1))
    73          setrep += '{%s, %i}' % (runtype[self.runs[i][0]], self.runs[i][1])
     69      setrep += cformat.multiline_fill(formatted_runs, ',', indent)
    7470      setrep += '};\n'
    75       setrep += (" " * indent) + ("%s.quads = {" % name)
    76       if len(self.quads) >= entries_per_line: setrep += "\n" + (" " * (indent+1))
    77       if self.quads != []:
    78          setrep += hex_specifier % self.quads[0]
    79          for i in range(1, len(self.quads)):
    80             setrep += ', '
    81             if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1))
    82             setrep += hex_specifier % (self.quads[i])
     71      setrep += (" " * indent) + ("%s.quads = {\n" % name)
     72      setrep += cformat.multiline_fill(formatted_quads, ',', indent)
    8373      setrep += '};\n'
    8474      return setrep
Note: See TracChangeset for help on using the changeset viewer.