source: icGREP/icgrep-devel/icgrep/generate_predefined_ucd_functions.cpp @ 4671

Last change on this file since 4671 was 4671, checked in by nmedfort, 4 years ago

Moved responsibility of handling 'special cases of Unicode TR #18' and 'compatibility properties of UTR #18 Annex C' into RE_Parser.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <cc/cc_compiler.h>
8#include <UCD/unicode_set.h>
9#include <UCD/PropertyObjectTable.h>
10#include <UCD/ucd_compiler.hpp>
11#include <pablo/pablo_compiler.h>
12#include <pablo/builder.hpp>
13#include <pablo/function.h>
14#include <llvm/Support/CommandLine.h>
15#include <utf_encoding.h>
16#include <pablo/optimizers/pablo_simplifier.hpp>
17#include <pablo/optimizers/pablo_codesinking.hpp>
18#ifdef ENABLE_MULTIPLEXING
19#include <pablo/optimizers/pablo_automultiplexing.hpp>
20#endif
21#include <llvm/IR/Verifier.h>
22#include <llvm/Support/Debug.h>
23#include <llvm/Support/TargetRegistry.h>
24#include <llvm/Support/TargetSelect.h>
25#include <llvm/Target/TargetLibraryInfo.h>
26#include <llvm/Target/TargetMachine.h>
27#include <llvm/Support/Host.h>
28#include <llvm/ADT/Triple.h>
29#include <llvm/Support/ToolOutputFile.h>
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/STLExtras.h>
33#include <llvm/Target/TargetSubtargetInfo.h>
34#include <llvm/Support/FormattedStream.h>
35#include "llvm/Support/FileSystem.h"
36#include <llvm/Transforms/Scalar.h>
37#include <boost/algorithm/string/case_conv.hpp>
38#include <iostream>
39
40using namespace pablo;
41using namespace UCD;
42using namespace cc;
43using namespace llvm;
44
45inline std::string lowercase(const std::string & name) {
46    std::locale loc;
47    return boost::algorithm::to_lower_copy(name, loc);
48}
49
50static cl::opt<std::string>
51ObjectFilename("o", cl::desc("Output Object filename"), cl::value_desc("filename"));
52
53static cl::opt<std::string>
54PropertyFilename("p", cl::desc("Install Property filename"), cl::value_desc("filename"));
55
56#ifdef ENABLE_MULTIPLEXING
57static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
58                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."));
59#endif
60
61using property_list = std::vector<std::pair<std::string, size_t>>;
62
63/** ------------------------------------------------------------------------------------------------------------- *
64 * @brief compileUnicodeSet
65 ** ------------------------------------------------------------------------------------------------------------- */
66size_t compileUnicodeSet(std::string name, const UnicodeSet & set, PabloCompiler & pc, Module * module) {
67    PabloFunction function = PabloFunction::Create(std::move(name));
68    Encoding encoding(Encoding::Type::UTF_8, 8);
69    CC_Compiler ccCompiler(function, encoding);
70    UCDCompiler ucdCompiler(ccCompiler);
71    PabloBuilder builder(function.getEntryBlock());
72    // Build the unicode set function
73    ucdCompiler.generateWithDefaultIfHierarchy(set, builder);
74    // Optimize it at the pablo level
75    Simplifier::optimize(function);
76    CodeSinking::optimize(function);
77    #ifdef ENABLE_MULTIPLEXING
78    if (EnableMultiplexing) {
79        AutoMultiplexing::optimize(function);
80    }
81    #endif
82    // Now compile the function ...
83    auto func = pc.compile(function, module);
84    releaseSlabAllocatorMemory();
85
86    return func.second;
87}
88
89/** ------------------------------------------------------------------------------------------------------------- *
90 * @brief writePropertyInstaller
91 ** ------------------------------------------------------------------------------------------------------------- */
92
93void writePropertyInstaller(property_list && properties) {
94
95    #ifdef USE_LLVM_3_5
96    std::string error;
97    raw_fd_ostream out(PropertyFilename.c_str(), error, sys::fs::F_None);
98    if (!error.empty()) {
99        throw std::runtime_error(error);
100    }
101    #else
102    std::error_code error;
103    raw_fd_ostream out(PropertyFilename, error, sys::fs::F_None);
104    if (error) {
105        throw std::runtime_error(error.message());
106    }
107    #endif
108
109    out << "#ifndef PROPERTYINSTALL\n";
110    out << "#define PROPERTYINSTALL\n\n";
111    out << "#include <include/simd-lib/bitblock.hpp>\n";
112    out << "#include <pablo/pablo_compiler.h>\n\n";
113    out << "namespace UCD {\n\n";
114    out << "struct Input {\n    BitBlock bit[8];\n};\n\n";
115    out << "struct Output {\n    BitBlock bit[1];\n};\n\n";
116    for (auto prop : properties) {
117        out << "extern \"C\" void " + prop.first + "(const Input &, BitBlock *, Output &);\n";
118    }
119    out << "\nvoid install_properties(pablo::PabloCompiler & p) {\n";
120    for (auto prop : properties) {
121        out << "    p.InstallExternalFunction(\"" + prop.first + "\", reinterpret_cast<void *>(&" + prop.first + "), " + std::to_string(prop.second) + ");\n";
122    }
123    out << "}\n}\n\n#endif\n";
124    out.close();
125}
126
127
128/** ------------------------------------------------------------------------------------------------------------- *
129 * @brief generateUCDModule
130 ** ------------------------------------------------------------------------------------------------------------- */
131Module * generateUCDModule() {
132
133    property_list properties;
134
135    PabloCompiler pc;
136    Module * module = new Module("ucd", getGlobalContext());
137    for (PropertyObject * obj : property_object_table) {
138        if (EnumeratedPropertyObject * enumObj = dyn_cast<EnumeratedPropertyObject>(obj)) {
139            for (const std::string value : *enumObj) {
140                const UnicodeSet & set = enumObj->GetCodepointSet(canonicalize_value_name(value));
141                std::string name = "__get_" + property_enum_name[enumObj->getPropertyCode()] + "_" + value;
142                properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
143            }
144        }
145        else if (ExtensionPropertyObject * extObj = dyn_cast<ExtensionPropertyObject>(obj)) {
146            for (const std::string value : *extObj) {
147                const UnicodeSet & set = extObj->GetCodepointSet(canonicalize_value_name(value));
148                std::string name = "__get_" + property_enum_name[extObj->getPropertyCode()] + "_" + value;
149                properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
150            }
151        }
152        else if (BinaryPropertyObject * binObj = dyn_cast<BinaryPropertyObject>(obj)) {
153            const UnicodeSet & set = binObj->GetCodepointSet(Binary_ns::Y);
154            std::string name = "__get_" + property_enum_name[binObj->getPropertyCode()] + "_Y";
155            properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
156        }
157    }
158
159    // Print an error message if our module is malformed in any way.
160    verifyModule(*module, &dbgs());
161
162    writePropertyInstaller(std::move(properties));
163
164    return module;
165}
166
167/** ------------------------------------------------------------------------------------------------------------- *
168 * @brief compileUCDModule
169 ** ------------------------------------------------------------------------------------------------------------- */
170void compileUCDModule(Module * module) {
171    Triple TheTriple;
172
173    // Initialize targets first, so that --version shows registered targets.
174    InitializeAllTargets();
175    InitializeAllTargetMCs();
176    InitializeAllAsmPrinters();
177    InitializeAllAsmParsers();
178
179    TheTriple.setTriple(sys::getDefaultTargetTriple());
180
181    // Get the target specific parser.
182    std::string msg;
183    const Target * TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), msg);
184    if (TheTarget == nullptr) {
185        throw std::runtime_error(msg);
186    }
187
188    auto MCPU = llvm::sys::getHostCPUName();
189
190    TargetOptions Options;
191
192    std::unique_ptr<TargetMachine> Target(
193                TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, "", Options,
194                                               Reloc::Default, CodeModel::Small, CodeGenOpt::Aggressive));
195
196    if (Target == nullptr) {
197        throw std::runtime_error("Could not allocate target machine!");
198    }
199
200    #ifdef USE_LLVM_3_5
201    std::string error;
202    std::unique_ptr<tool_output_file> Out = make_unique<tool_output_file>(ObjectFilename.c_str(), error, sys::fs::F_None);
203    if (!error.empty()) {
204        throw std::runtime_error(error);
205    }
206    #else
207    std::error_code error;
208    std::unique_ptr<tool_output_file> Out = make_unique<tool_output_file>(ObjectFilename, error, sys::fs::F_None);
209    if (error) {
210        throw std::runtime_error(error.message());
211    }
212    #endif
213
214    // Build up all of the passes that we want to do to the module.
215    PassManager PM;
216
217    // Add an appropriate TargetLibraryInfo pass for the module's triple.
218    PM.add(new TargetLibraryInfo(TheTriple));
219
220    // Add the target data from the target machine, if it exists, or the module.
221    #ifdef USE_LLVM_3_5
222    const DataLayout * DL = Target->getDataLayout();
223    #else
224    const DataLayout * DL = Target->getSubtargetImpl()->getDataLayout();
225    #endif
226    if (DL) {
227        module->setDataLayout(DL);
228    }
229    #ifdef USE_LLVM_3_5
230    PM.add(new DataLayoutPass(module));
231    #else
232    PM.add(new DataLayoutPass());
233    #endif
234    PM.add(createReassociatePass());
235    PM.add(createInstructionCombiningPass());
236    PM.add(createSinkingPass());
237
238    formatted_raw_ostream FOS(Out->os());
239    // Ask the target to add backend passes as necessary.
240    if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_ObjectFile)) {
241        throw std::runtime_error("Target does not support generation of object file type!\n");
242    }
243
244    PM.run(*module);
245
246    Out->keep();
247}
248
249/** ------------------------------------------------------------------------------------------------------------- *
250 * @brief main
251 ** ------------------------------------------------------------------------------------------------------------- */
252int main(int argc, char *argv[]) {
253    cl::ParseCommandLineOptions(argc, argv, "UCD Compiler\n");
254    if (PropertyFilename.empty()) {
255        PropertyFilename = "PropertyInstall.h";
256    }
257    if (ObjectFilename.empty()) {
258        ObjectFilename = "pregenerated_properties.o";
259    }
260    Module * module = generateUCDModule();
261    compileUCDModule(module);
262    return 0;
263}
Note: See TracBrowser for help on using the repository browser.