source: proto/charsetcompiler/generate_UCD_property_functions.py @ 4374

Last change on this file since 4374 was 4374, checked in by cameron, 4 years ago

Generator to produce pablo files for various Unicode properties

File size: 3.5 KB
Line 
1#
2# generate_UCD_property_functions.py -
3# generating Python pablo functions for various Unicode properties
4#
5# Robert D. Cameron
6# December 28, 2014
7#
8# Licensed under Open Software License 3.0.
9#
10#
11import re, string, os.path, UCD.cformat
12from UCD.unicode_set import *
13from UCD.UCD_parser import *
14from if_hierarchy import *
15
16
17
18
19Unicode_CC_struct = "class struct_%s_%s:\n\tcc = 0\n\n"
20
21Unicode_CC_header = "def %s_%s(basis_bits, struct_%s):\n"
22
23Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
24
25Unicode_template_var = "struct_%s.cc"
26
27
28
29
30
31
32
33class UCD_generator():
34    def __init__(self): 
35       pass
36 
37    def load_property_name_info(self):
38       (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
39
40    def load_property_value_info(self):
41       (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
42
43
44
45    def generate_property_file(self, filename_root, property_code, default_value = None):
46       canonical_property_value_map = self.property_value_lookup_map[property_code]
47       (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, filename_root + '.txt', canonical_property_value_map, default_value)
48       for v in self.property_value_list[property_code]:
49          if not v in prop_values: 
50             #raise Exception("Property %s value %s missing" % (self.full_name_map[property_code], v))
51             print("Warning: property %s has no instance of value %s" % (self.full_name_map[property_code], v))
52             value_map[v] = empty_uset()
53       basename = os.path.basename(filename_root)
54       f = open(basename + ".pablo", "wb")
55       for k in value_map.keys():
56         f.write(Unicode_CC_struct % (property_code, k))
57         f.write(Unicode_CC_header % (property_code, k, k))
58         template_var = "struct_%s_%%s.cc" % property_code
59         f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
60       f.write(Unicode_dummy_main)
61       cformat.close_header_file(f)
62
63    def generate_ScriptExtensions_pablo(self):
64       filename_root = 'ScriptExtensions'
65       property_code = 'scx'
66       (prop_values, value_map) = parse_ScriptExtensions_txt(self.property_value_lookup_map['sc'])
67       basename = os.path.basename(filename_root)
68       f = open(basename + ".pablo", "wb")
69       for k in value_map.keys():
70         f.write(Unicode_CC_struct % (property_code, k))
71         f.write(Unicode_CC_header % (property_code, k, k))
72         template_var = "struct_%s_%%s.cc" % property_code
73         f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
74       f.write(Unicode_dummy_main)
75       cformat.close_header_file(f)
76
77
78
79def UCD_main():
80   ucd = UCD_generator()
81
82   # First parse all property names and their aliases
83   ucd.load_property_name_info()
84   #
85   # Next parse all property value names and their aliases.  Generate the data.
86   ucd.load_property_value_info()
87   #
88   # The Block property
89   #ucd.generate_property_file('Blocks', 'blk')
90   #
91   # Scripts
92   ucd.generate_property_file('Scripts', 'sc')
93   #
94   # Script Extensions
95   ucd.generate_ScriptExtensions_pablo()
96   #
97   # General Category
98   ucd.generate_property_file('extracted/DerivedGeneralCategory', 'gc')
99   #
100
101if __name__ == "__main__":
102  set_UCD_dir('UCD/7.0.0')
103  UCD_main()
Note: See TracBrowser for help on using the repository browser.