source: icGREP/icgrep-devel/icgrep/generate_predefined_ucd_functions.cpp @ 4666

Last change on this file since 4666 was 4666, checked in by nmedfort, 4 years ago

UCD generator working but yet incorporated into icGrep.

File size: 7.5 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <cc/cc_compiler.h>
8#include <UCD/unicode_set.h>
9#include <UCD/PropertyObjectTable.h>
10#include <UCD/ucd_compiler.hpp>
11#include <pablo/pablo_compiler.h>
12#include <pablo/builder.hpp>
13#include <pablo/function.h>
14#include <llvm/Support/CommandLine.h>
15#include <utf_encoding.h>
16#include <pablo/optimizers/pablo_simplifier.hpp>
17#include <pablo/optimizers/pablo_codesinking.hpp>
18#ifdef ENABLE_MULTIPLEXING
19#include <pablo/optimizers/pablo_automultiplexing.hpp>
20#endif
21#include <llvm/IR/Verifier.h>
22#include <llvm/Support/Debug.h>
23#include <llvm/Support/TargetRegistry.h>
24#include <llvm/Support/TargetSelect.h>
25#include <llvm/Target/TargetLibraryInfo.h>
26#include <llvm/Target/TargetMachine.h>
27#include <llvm/Support/Host.h>
28#include <llvm/ADT/Triple.h>
29#include <llvm/Support/ToolOutputFile.h>
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/STLExtras.h>
33#include <llvm/Target/TargetSubtargetInfo.h>
34#include <llvm/Support/FormattedStream.h>
35#include "llvm/Support/FileSystem.h"
36#include <llvm/Transforms/Scalar.h>
37
38
39#include <boost/algorithm/string/case_conv.hpp>
40#include <iostream>
41
42using namespace pablo;
43using namespace UCD;
44using namespace cc;
45using namespace llvm;
46
47inline std::string lowercase(const std::string & name) {
48    std::locale loc;
49    return boost::algorithm::to_lower_copy(name, loc);
50}
51
52static cl::opt<std::string>
53OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
54
55/** ------------------------------------------------------------------------------------------------------------- *
56 * @brief compileUnicodeSet
57 ** ------------------------------------------------------------------------------------------------------------- */
58void compileUnicodeSet(std::string name, const UnicodeSet & set, PabloCompiler & pc, Module * module) {
59    PabloFunction function = PabloFunction::Create(std::move(name));
60    Encoding encoding(Encoding::Type::UTF_8, 8);
61    CC_Compiler ccCompiler(function, encoding);
62    UCDCompiler ucdCompiler(ccCompiler);
63    PabloBuilder builder(function.getEntryBlock());
64    // Build the unicode set function
65    ucdCompiler.generateWithDefaultIfHierarchy(set, builder);
66    // Optimize it at the pablo level
67    Simplifier::optimize(function);
68    CodeSinking::optimize(function);
69    #ifdef ENABLE_MULTIPLEXING
70    AutoMultiplexing::optimize(function);
71    #endif
72    // Now compile the function ...
73    pc.compile(function, module);
74    releaseSlabAllocatorMemory();
75}
76
77/** ------------------------------------------------------------------------------------------------------------- *
78 * @brief generateUCDModule
79 ** ------------------------------------------------------------------------------------------------------------- */
80Module * generateUCDModule() {
81    PabloCompiler pc;
82    Module * module = new Module("ucd", getGlobalContext());
83    for (PropertyObject * obj : property_object_table) {
84
85        if (isa<UnsupportedPropertyObject>(obj)) continue;
86
87        if (auto * enumObj = dyn_cast<EnumeratedPropertyObject>(obj)) {
88            for (const std::string value : *enumObj) {
89                const UnicodeSet & set = enumObj->GetCodepointSet(canonicalize_value_name(value));
90                std::string name = "__get_" + property_enum_name[enumObj->getPropertyCode()] + "_" + lowercase(value);
91                compileUnicodeSet(name, set, pc, module);
92            }
93            break;
94        }
95        else if (auto * extObj = dyn_cast<ExtensionPropertyObject>(obj)) {
96            for (const std::string value : *extObj) {
97                const UnicodeSet & set = extObj->GetCodepointSet(canonicalize_value_name(value));
98                std::string name = "__get_" + property_enum_name[extObj->getPropertyCode()] + "_" + lowercase(value);
99                compileUnicodeSet(name, set, pc, module);
100            }
101        }
102        else if (auto * binObj = dyn_cast<BinaryPropertyObject>(obj)) {
103            const UnicodeSet & set = binObj->GetCodepointSet(Binary_ns::Y);
104            std::string name = "__get_" + property_enum_name[binObj->getPropertyCode()] + "_y";
105            compileUnicodeSet(name, set, pc, module);
106        }
107    }
108
109    // Print an error message if our module is malformed in any way.
110    verifyModule(*module, &dbgs());
111
112    return module;
113}
114
115/** ------------------------------------------------------------------------------------------------------------- *
116 * @brief compileUCDModule
117 ** ------------------------------------------------------------------------------------------------------------- */
118void compileUCDModule(Module * module) {
119    Triple TheTriple;
120
121    // Initialize targets first, so that --version shows registered targets.
122    InitializeAllTargets();
123    InitializeAllTargetMCs();
124    InitializeAllAsmPrinters();
125    InitializeAllAsmParsers();
126
127
128    TheTriple.setTriple(sys::getDefaultTargetTriple());
129
130    // Get the target specific parser.
131    std::string msg;
132    const Target * TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), msg);
133
134    if (TheTarget == nullptr) {
135        throw std::runtime_error(msg);
136    }
137
138    auto MCPU = llvm::sys::getHostCPUName();
139
140    TargetOptions Options;
141
142    std::unique_ptr<TargetMachine> Target(
143                TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, "", Options,
144                                               Reloc::Default, CodeModel::Small, CodeGenOpt::Aggressive));
145
146    if (Target == nullptr) {
147        throw std::runtime_error("Could not allocate target machine!");
148    }
149
150    if (OutputFilename.empty()) {
151        OutputFilename = "ucd.o";
152    }
153
154    #ifdef USE_LLVM_3_5
155    std::string error;
156    std::unique_ptr<tool_output_file> Out = make_unique<tool_output_file>(OutputFilename.c_str(), error, sys::fs::F_None);
157    if (!error.empty()) {
158        throw std::runtime_error(error);
159    }
160    #else
161    std::error_code error;
162    std::unique_ptr<tool_output_file> Out = make_unique<tool_output_file>(OutputFilename, error, sys::fs::F_None);
163    if (error) {
164        throw std::runtime_error(error.message());
165    }
166    #endif
167
168    // Build up all of the passes that we want to do to the module.
169    PassManager PM;
170
171    // Add an appropriate TargetLibraryInfo pass for the module's triple.
172    TargetLibraryInfo * TLI = new TargetLibraryInfo(TheTriple);
173
174    PM.add(TLI);
175
176    // Add the target data from the target machine, if it exists, or the module.
177    #ifdef USE_LLVM_3_5
178    const DataLayout * DL = Target->getDataLayout();
179    #else
180    const DataLayout * DL = Target->getSubtargetImpl()->getDataLayout();
181    #endif
182    if (DL) {
183        module->setDataLayout(DL);
184    }
185    PM.add(new DataLayoutPass());
186    PM.add(createReassociatePass());
187    PM.add(createInstructionCombiningPass());
188    PM.add(createSinkingPass());
189
190    formatted_raw_ostream FOS(Out->os());
191    // Ask the target to add backend passes as necessary.
192    if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_ObjectFile)) {
193        throw std::runtime_error("Target does not support generation of this file type!\n");
194    }
195
196    PM.run(*module);
197
198    Out->keep();
199}
200
201/** ------------------------------------------------------------------------------------------------------------- *
202 * @brief main
203 ** ------------------------------------------------------------------------------------------------------------- */
204int main(int argc, char *argv[]) {
205    cl::ParseCommandLineOptions(argc, argv, "UCD Compiler\n");
206    Module * module = generateUCDModule();
207    compileUCDModule(module);
208    return 0;
209}
Note: See TracBrowser for help on using the repository browser.