source: proto/charsetcompiler/generate_UCD_property_functions.py @ 4379

Last change on this file since 4379 was 4379, checked in by cameron, 4 years ago

Incremental step towards general property support

File size: 4.6 KB
Line 
1#
2# generate_UCD_property_functions.py -
3# generating Python pablo functions for various Unicode properties
4#
5# Robert D. Cameron
6# December 28, 2014
7#
8# Licensed under Open Software License 3.0.
9#
10#
11import re, string, os.path, UCD.cformat
12from UCD.unicode_set import *
13from UCD.UCD_parser import *
14from if_hierarchy import *
15from string import Template
16
17Unicode_property_fn_template = Template(r"""
18Property_${property}_${value} * f${property}_${value} = nullptr;
19
20extern "C" {
21    BitBlock __get_${property}_${value} (Basis_bits & basis_bits) {
22        if (f${property}_${value} == nullptr) f${property}_${value} = new Property_${property}_${value}();
23        Struct_${property}_${value} output;
24        f${property}_${value} -> do_block(basis_bits, output);
25        return output.cc;
26    }
27}
28""")
29
30Unicode_pablo_template = r"""
31
32#include "icgrep.h"
33
34#define LocalCarryDeclare(name, count)\
35CarryArray<count, 0> name;\
36
37
38@global
39
40
41"""
42
43Unicode_CC_struct = Template("class Struct_${property}_${value}:\n\tcc = 0\n\n")
44
45Unicode_CC_header = Template("def property_${property}_${value}(basis_bits, struct_${property}_${value}):\n")
46
47Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
48
49Unicode_template_var = Template("struct_${property}_%s.cc")
50
51
52class UCD_generator():
53    def __init__(self): 
54       pass
55 
56    def load_property_name_info(self):
57       (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
58
59    def load_property_value_info(self):
60       (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
61
62    def generate_enumerated_property_template(self, filename_root, property_code):
63       f = cformat.open_header_file_for_write(os.path.basename(property_code))
64       f.write(Unicode_pablo_template)
65       vlist = self.property_value_list[property_code]
66       for v in vlist:
67          f.write(Unicode_property_fn_template.substitute({'property': property_code.lower(), 'value': v}))
68       cformat.close_header_file(f)
69       
70    def generate_enumerated_property_file(self, filename_root, property_code, default_value = None):
71       vlist = self.property_value_list[property_code]
72       canon_map = self.property_value_lookup_map[property_code]
73       (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt', default_value)
74       basename = os.path.basename(filename_root)
75       f = open(basename + ".pablo", "wb")
76       for k in value_map.keys():
77         parms = {'property' : property_code.lower(), 'value' : k}
78         f.write(Unicode_CC_struct.substitute(parms))
79         f.write(Unicode_CC_header.substitute(parms))
80         template_var = Unicode_template_var.substitute(parms)
81         f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
82       f.write(Unicode_dummy_main)
83       cformat.close_header_file(f)
84
85    def generate_ScriptExtensions_pablo(self):
86       filename_root = 'ScriptExtensions'
87       property_code = 'scx'
88       (prop_values, value_map) = parse_ScriptExtensions_txt(self.property_value_list['sc'], self.property_value_lookup_map['sc'])
89       basename = os.path.basename(filename_root)
90       f = open(basename + ".pablo", "wb")
91       for k in value_map.keys():
92         parms = {'property' : 'scx', 'value' : k}
93         f.write(Unicode_CC_struct.substitute(parms))
94         f.write(Unicode_CC_header.substitute(parms))
95         template_var = Unicode_template_var.substitute(parms)
96         f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
97       f.write(Unicode_dummy_main)
98       cformat.close_header_file(f)
99
100
101
102def UCD_main():
103   ucd = UCD_generator()
104
105   # First parse all property names and their aliases
106   ucd.load_property_name_info()
107   #
108   # Next parse all property value names and their aliases.  Generate the data.
109   ucd.load_property_value_info()
110   #
111   # The Block property
112   #ucd.generate_enumerated_property_file('Blocks', 'blk')
113   #
114   # Scripts
115   ucd.generate_enumerated_property_template('Scripts', 'sc')
116   ucd.generate_enumerated_property_file('Scripts', 'sc')
117   #
118   # Script Extensions
119   #ucd.generate_enumerated_property_template('ScriptExtensions', 'scx')
120   ucd.generate_ScriptExtensions_pablo()
121   #
122   # General Category
123   ucd.generate_enumerated_property_template('GeneralCategory', 'gc')
124   ucd.generate_enumerated_property_file('extracted/DerivedGeneralCategory', 'gc')
125   #
126
127if __name__ == "__main__":
128  set_UCD_dir('UCD/7.0.0')
129  UCD_main()
Note: See TracBrowser for help on using the repository browser.