source: proto/charsetcompiler/generate_UCD_property_functions.py @ 4637

Last change on this file since 4637 was 4637, checked in by cameron, 4 years ago

Update to use Unicode 8.0.0 database files

File size: 7.5 KB
Line 
1#
2# generate_UCD_property_functions.py -
3# generating Python pablo functions for various Unicode properties
4#
5# Robert D. Cameron
6# January 2, 2015
7#
8# Licensed under Open Software License 3.0.
9#
10#
11import re, string, os.path, UCD.cformat
12from UCD.unicode_set import *
13from UCD.UCD_parser import *
14from if_hierarchy import *
15from string import Template
16
17Unicode_property_fn_template = Template(r"""
18Property_${property}_${value} f${property}_${value};
19
20extern "C" {
21    BitBlock __get_${property}_${value} (Basis_bits & basis_bits) {
22        Struct_${property}_${value} output;
23        f${property}_${value} . do_block(basis_bits, output);
24        return output.cc;
25    }
26}
27""")
28
29Unicode_pablo_template = r"""
30
31#include "icgrep.h"
32
33#define LocalCarryDeclare(name, count)\
34CarryArray<count, 0> name;\
35
36
37@global
38
39
40"""
41
42Unicode_CC_struct = Template("class Struct_${property}_${value}:\n\tcc = 0\n\n")
43
44Unicode_CC_header = Template("def property_${property}_${value}(basis_bits, struct_${property}_${value}):\n")
45
46Unicode_dummy_main = "\n\ndef Main(basis_bits):\n    pass\n"
47
48Unicode_template_var = Template("struct_${property}_%s.cc")
49
50install_fn_header = r"""
51void install_property_%s_fn_ptrs(pablo::PabloCompiler & p);
52"""
53install_fn = r"""
54void install_property_%s_fn_ptrs(pablo::PabloCompiler & p) {
55  %s
56}
57"""
58
59class UCD_generator():
60    def __init__(self): 
61       pass
62 
63    def load_property_name_info(self):
64        (self.property_enum_name_list, self.full_name_map, self.property_lookup_map, self.property_kind_map) = parse_PropertyAlias_txt()
65
66    def load_property_value_info(self):
67        (self.property_value_list, self.property_value_enum_integer, self.property_value_full_name_map, self.property_value_lookup_map, self.missing_specs) = parse_PropertyValueAlias_txt(self.property_lookup_map)
68
69    def generate_enumerated_property_header_and_template(self, filename_root, property_code):
70        hf = cformat.open_header_file_for_write(os.path.basename("precompiled_%s" % property_code), 'generate_UCD_property_functions.py')
71        cformat.write_imports(hf, ['"pablo/pablo_compiler.h"'])
72        pc = property_code.lower()
73        hf.write(install_fn_header % pc)
74        cformat.close_header_file(hf)
75        tf = cformat.open_cpp_file_for_write(os.path.basename("precompiled_%s" % property_code), 'generate_UCD_property_functions.py')
76        cformat.write_imports(tf, ['"icgrep.h"'])
77        tf.write(Unicode_pablo_template)
78        vlist = self.property_value_list[property_code]
79        for v in vlist:
80            tf.write(Unicode_property_fn_template.substitute({'property': pc, 'value': v}))
81        install_stmt = "  p.InstallExternalFunction(\"__get_%s_%%s\", (void *) & __get_%s_%%s);\n" % (pc, pc)
82        install_list = [install_stmt % (k, k) for k in vlist]
83        tf.write(install_fn % (pc, "".join(install_list)))
84        cformat.close_cpp_file(tf)
85
86    def generate_enumerated_property_file(self, filename_root, property_code):
87        vlist = self.property_value_list[property_code]
88        canon_map = self.property_value_lookup_map[property_code]
89        (prop_values, value_map) = parse_UCD_enumerated_property_map(property_code, vlist, canon_map, filename_root + '.txt')
90        basename = os.path.basename(filename_root)
91        f = open(basename + ".pablo", "wb")
92        for k in vlist:
93            parms = {'property' : property_code.lower(), 'value' : k}
94            f.write(Unicode_CC_struct.substitute(parms))
95            f.write(Unicode_CC_header.substitute(parms))
96            template_var = Unicode_template_var.substitute(parms)
97            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
98        f.write(Unicode_dummy_main)
99        f.close()
100
101    def generate_ScriptExtensions_pablo(self):
102        filename_root = 'ScriptExtensions'
103        property_code = 'scx'
104        vlist = self.property_value_list['sc']
105        (prop_values, value_map) = parse_ScriptExtensions_txt(vlist, self.property_value_lookup_map['sc'])
106        basename = os.path.basename(filename_root)
107        f = open(basename + ".pablo", "wb")
108        for k in vlist:
109            parms = {'property' : 'scx', 'value' : k}
110            f.write(Unicode_CC_struct.substitute(parms))
111            f.write(Unicode_CC_header.substitute(parms))
112            template_var = Unicode_template_var.substitute(parms)
113            f.write(generateCharClassDefs(defaultIfRangeList, {k: uset_to_range_list(value_map[k])},  template_var))
114        f.write(Unicode_dummy_main)
115        f.close()
116
117    def generate_binary_property_header_and_template(self, filename_root):
118        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
119        lprops = [p.lower() for p in props]
120        hf = cformat.open_header_file_for_write(os.path.basename("precompiled_%s" % filename_root.lower()), 'generate_UCD_property_functions.py')
121        cformat.write_imports(hf, ['"pablo/pablo_compiler.h"'])
122        hf.write(install_fn_header % filename_root)
123        cformat.close_header_file(hf)
124        tf = cformat.open_cpp_file_for_write(os.path.basename("precompiled_%s" % filename_root.lower()), 'generate_UCD_property_functions.py')
125        cformat.write_imports(tf, ['"icgrep.h"'])
126        tf.write(Unicode_pablo_template)
127        for p in lprops:
128            tf.write(Unicode_property_fn_template.substitute({'property': p, 'value': 'Y'}))
129        install_stmt = "  p.InstallExternalFunction(\"__get_%s_Y\", (void *) & __get_%s_Y);\n"
130        install_list = [install_stmt % (p, p) for p in lprops]
131        tf.write(install_fn % (filename_root, "".join(install_list)))
132        cformat.close_cpp_file(tf)
133       
134    def generate_binary_properties_file(self, filename_root):
135        (props, prop_map) = parse_UCD_codepoint_name_map(filename_root + '.txt', self.property_lookup_map)
136        basename = os.path.basename(filename_root)
137        f = open(basename + ".pablo", "wb")
138        for p in props:
139            parms = {'property' : p.lower(), 'value' : 'Y'}
140            f.write(Unicode_CC_struct.substitute(parms))
141            f.write(Unicode_CC_header.substitute(parms))
142            template_var = Unicode_template_var.substitute(parms)
143            f.write(generateCharClassDefs(defaultIfRangeList, {'Y': uset_to_range_list(prop_map[p])},  template_var))
144        f.write(Unicode_dummy_main)
145        f.close()
146
147
148def UCD_main():
149    ucd = UCD_generator()
150
151    # First parse all property names and their aliases
152    ucd.load_property_name_info()
153    #
154    # Next parse all property value names and their aliases.  Generate the data.
155    ucd.load_property_value_info()
156    #
157    # The Block property
158    ucd.generate_enumerated_property_header_and_template('Blocks', 'blk')
159    ucd.generate_enumerated_property_file('Blocks', 'blk')
160    #
161    # Scripts
162    ucd.generate_enumerated_property_header_and_template('Scripts', 'sc')
163    ucd.generate_enumerated_property_file('Scripts', 'sc')
164    #
165    # Script Extensions
166    ucd.generate_enumerated_property_header_and_template('ScriptExtensions', 'scx')
167    ucd.generate_ScriptExtensions_pablo()
168    #
169    # General Category
170    ucd.generate_enumerated_property_header_and_template('GeneralCategory', 'gc')
171    ucd.generate_enumerated_property_file('extracted/DerivedGeneralCategory', 'gc')
172    #
173    # Core Properties
174    ucd.generate_binary_property_header_and_template('DerivedCoreProperties')
175    ucd.generate_binary_properties_file('DerivedCoreProperties')
176    #
177    ucd.generate_binary_property_header_and_template('PropList')
178    ucd.generate_binary_properties_file('PropList')
179
180if __name__ == "__main__":
181    set_UCD_dir('UCD/8.0.0')
182    UCD_main()
Note: See TracBrowser for help on using the repository browser.