source: proto/charsetcompiler/generate_UCD_property_functions.py

Last change on this file was 4675, checked in by nmedfort, 4 years ago

Modification to pass output value by ref and accept a dummy carry pointer.

File size: 7.5 KB
Line 
1#
2# generate_UCD_property_functions.py -
3# generating Python pablo functions for various Unicode properties
4#
5# Robert D. Cameron
6# January 2, 2015
7#
8# Licensed under Open Software License 3.0.
9#
10#
11import re, string, os.path, UCD.cformat
12from UCD.unicode_set import *
13from UCD.UCD_parser import *
14from if_hierarchy import *
15from string import Template
16
17Unicode_property_fn_template = Template(r"""
18Property_${property}_${value} f${property}_${value};
19
20extern "C" {
21    void __get_${property}_${value} (Basis_bits & basis_bits, BitBlock * /* carryFramePtr */, Struct_${property}_${value} & output) {
22        f${property}_${value} . do_block(basis_bits, output);
23    }
24}
25""")
26
27Unicode_pablo_template = r"""
28
29#include "icgrep.h"
30
31#define LocalCarryDeclare(name, count)\
32CarryArray<count, 0> name;\
33
34
35@global
36
37
38"""
39
40Unicode_CC_struct = Template("class Struct_${property}_${value}:\n\tcc = 0\n\n")
41
42Unicode_CC_header = Template("def property_${property}_${value}(basis_bits, struct_${property}_${value}):\n")
43
44Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
45
46Unicode_template_var = Template("struct_${property}_%s.cc")
47
48install_fn_header = r"""
49void install_property_%s_fn_ptrs(pablo::PabloCompiler & p);
50"""
51install_fn = r"""
52void install_property_%s_fn_ptrs(pablo::PabloCompiler & p) {
53  %s
54}
55"""
56
57class UCD_generator():
58    def __init__(self): 
59       pass
60 
61    def load_property_name_info(self):
62        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
63
64    def load_property_value_info(self):
65        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
66
67    def generate_enumerated_property_header_and_template(self, filename_root, property_code):
68        hf = cformat.open_header_file_for_write(os.path.basename("precompiled_%s" % property_code), 'generate_UCD_property_functions.py')
69        cformat.write_imports(hf, ['"pablo/pablo_compiler.h"'])
70        pc = property_code.lower()
71        hf.write(install_fn_header % pc)
72        cformat.close_header_file(hf)
73        tf = cformat.open_cpp_file_for_write(os.path.basename("precompiled_%s" % property_code), 'generate_UCD_property_functions.py')
74        cformat.write_imports(tf, ['"icgrep.h"'])
75        tf.write(Unicode_pablo_template)
76        vlist = self.property_value_list[property_code]
77        for v in vlist:
78            tf.write(Unicode_property_fn_template.substitute({'property': pc, 'value': v}))
79        install_stmt = "  p.InstallExternalFunction(\"__get_%s_%%s\", (void *) & __get_%s_%%s);\n" % (pc, pc)
80        install_list = [install_stmt % (k, k) for k in vlist]
81        tf.write(install_fn % (pc, "".join(install_list)))
82        cformat.close_cpp_file(tf)
83
84    def generate_enumerated_property_file(self, filename_root, property_code):
85        vlist = self.property_value_list[property_code]
86        canon_map = self.property_value_lookup_map[property_code]
87        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
88        basename = os.path.basename(filename_root)
89        f = open(basename + ".pablo", "wb")
90        for k in vlist:
91            parms = {'property' : property_code.lower(), 'value' : k}
92            f.write(Unicode_CC_struct.substitute(parms))
93            f.write(Unicode_CC_header.substitute(parms))
94            template_var = Unicode_template_var.substitute(parms)
95            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
96        f.write(Unicode_dummy_main)
97        f.close()
98
99    def generate_ScriptExtensions_pablo(self):
100        filename_root = 'ScriptExtensions'
101        vlist = self.property_value_list['sc']
102        (prop_values, value_map) = parse_ScriptExtensions_txt(vlist, self.property_value_lookup_map['sc'])
103        basename = os.path.basename(filename_root)
104        f = open(basename + ".pablo", "wb")
105        for k in vlist:
106            parms = {'property' : 'scx', 'value' : k}
107            f.write(Unicode_CC_struct.substitute(parms))
108            f.write(Unicode_CC_header.substitute(parms))
109            template_var = Unicode_template_var.substitute(parms)
110            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
111        f.write(Unicode_dummy_main)
112        f.close()
113
114    def generate_binary_property_header_and_template(self, filename_root):
115        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
116        lprops = [p.lower() for p in props]
117        hf = cformat.open_header_file_for_write(os.path.basename("precompiled_%s" % filename_root.lower()), 'generate_UCD_property_functions.py')
118        cformat.write_imports(hf, ['"pablo/pablo_compiler.h"'])
119        hf.write(install_fn_header % filename_root)
120        cformat.close_header_file(hf)
121        tf = cformat.open_cpp_file_for_write(os.path.basename("precompiled_%s" % filename_root.lower()), 'generate_UCD_property_functions.py')
122        cformat.write_imports(tf, ['"icgrep.h"'])
123        tf.write(Unicode_pablo_template)
124        for p in lprops:
125            tf.write(Unicode_property_fn_template.substitute({'property': p, 'value': 'Y'}))
126        install_stmt = "  p.InstallExternalFunction(\"__get_%s_Y\", (void *) & __get_%s_Y);\n"
127        install_list = [install_stmt % (p, p) for p in lprops]
128        tf.write(install_fn % (filename_root, "".join(install_list)))
129        cformat.close_cpp_file(tf)
130       
131    def generate_binary_properties_file(self, filename_root):
132        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
133        basename = os.path.basename(filename_root)
134        f = open(basename + ".pablo", "wb")
135        for p in props:
136            parms = {'property' : p.lower(), 'value' : 'Y'}
137            f.write(Unicode_CC_struct.substitute(parms))
138            f.write(Unicode_CC_header.substitute(parms))
139            template_var = Unicode_template_var.substitute(parms)
140            f.write(generateCharClassDefs(defaultIfRangeList, {'Y': uset_to_range_list(prop_map[p])},  template_var))
141        f.write(Unicode_dummy_main)
142        f.close()
143
144
145def UCD_main():
146    ucd = UCD_generator()
147
148    # First parse all property names and their aliases
149    ucd.load_property_name_info()
150    #
151    # Next parse all property value names and their aliases.  Generate the data.
152    ucd.load_property_value_info()
153    #
154    # The Block property
155    ucd.generate_enumerated_property_header_and_template('Blocks', 'blk')
156    ucd.generate_enumerated_property_file('Blocks', 'blk')
157    #
158    # Scripts
159    ucd.generate_enumerated_property_header_and_template('Scripts', 'sc')
160    ucd.generate_enumerated_property_file('Scripts', 'sc')
161    #
162    # Script Extensions
163    ucd.generate_enumerated_property_header_and_template('ScriptExtensions', 'scx')
164    ucd.generate_ScriptExtensions_pablo()
165    #
166    # General Category
167    ucd.generate_enumerated_property_header_and_template('GeneralCategory', 'gc')
168    ucd.generate_enumerated_property_file('extracted/DerivedGeneralCategory', 'gc')
169    #
170    # Core Properties
171    ucd.generate_binary_property_header_and_template('DerivedCoreProperties')
172    ucd.generate_binary_properties_file('DerivedCoreProperties')
173    #
174    ucd.generate_binary_property_header_and_template('PropList')
175    ucd.generate_binary_properties_file('PropList')
176
177if __name__ == "__main__":
178    set_UCD_dir('UCD/8.0.0')
179    UCD_main()
Note: See TracBrowser for help on using the repository browser.