source: icGREP/icgrep-devel/icgrep/generate_predefined_ucd_functions.cpp @ 4722

Last change on this file since 4722 was 4722, checked in by nmedfort, 4 years ago

Misc. changes and start of dependency chain analysis in ucd generator.

File size: 15.8 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <cc/cc_compiler.h>
8#include <UCD/unicode_set.h>
9#include <UCD/PropertyObjectTable.h>
10#include <UCD/ucd_compiler.hpp>
11#include <pablo/pablo_compiler.h>
12#include <pablo/builder.hpp>
13#include <pablo/function.h>
14#include <llvm/Support/CommandLine.h>
15#include <utf_encoding.h>
16#include <pablo/optimizers/pablo_simplifier.hpp>
17#include <pablo/optimizers/pablo_codesinking.hpp>
18#ifdef ENABLE_MULTIPLEXING
19#include <pablo/optimizers/pablo_automultiplexing.hpp>
20#endif
21#include <llvm/IR/Verifier.h>
22#include <llvm/Support/Debug.h>
23#include <llvm/Support/TargetRegistry.h>
24#include <llvm/Support/TargetSelect.h>
25#include <llvm/Target/TargetLibraryInfo.h>
26#include <llvm/Target/TargetMachine.h>
27#include <llvm/Support/Host.h>
28#include <llvm/ADT/Triple.h>
29#include <llvm/Support/ToolOutputFile.h>
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/STLExtras.h>
33#include <llvm/Target/TargetSubtargetInfo.h>
34#include <llvm/Support/FormattedStream.h>
35#include "llvm/Support/FileSystem.h"
36#include <llvm/Transforms/Scalar.h>
37#include <llvm/Support/raw_ostream.h>
38#include <llvm/Analysis/DependenceAnalysis.h>
39
40#include <queue>
41#include <unordered_map>
42
43using namespace pablo;
44using namespace UCD;
45using namespace cc;
46using namespace llvm;
47
48static cl::opt<std::string>
49ObjectFilename("o", cl::desc("Output object filename"), cl::value_desc("filename"), cl::Required);
50
51static cl::opt<std::string>
52UCDSourcePath("dir", cl::desc("UCD source code directory"), cl::value_desc("directory"), cl::Required);
53
54static cl::opt<bool> PrintDependenceAnalysis("pablo-ldc", cl::init(false), cl::desc("print Pablo longest dependency chain metrics."));
55
56
57#ifdef ENABLE_MULTIPLEXING
58static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
59    cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."));
60
61static cl::opt<std::string>
62MultiplexingDistribution("multiplexing-dist",
63    cl::desc("Generate a CSV containing the # of Advances found in each UCD function before and after applying multiplexing."), cl::value_desc("filename"));
64
65static raw_fd_ostream * MultiplexingDistributionFile = nullptr;
66#endif
67
68using property_list = std::vector<std::pair<std::string, size_t>>;
69
70/** ------------------------------------------------------------------------------------------------------------- *
71 * @brief getNumOfAdvances
72 ** ------------------------------------------------------------------------------------------------------------- */
73unsigned getNumOfAdvances(const PabloBlock & entry) {
74    unsigned advances = 0;
75    for (const Statement * stmt : entry ) {
76        if (isa<Advance>(stmt)) {
77            ++advances;
78        }
79        else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
80            advances += getNumOfAdvances(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
81        }
82    }
83    return advances;
84}
85
86/** ------------------------------------------------------------------------------------------------------------- *
87 * @brief computePabloDependencyMetrics
88 ** ------------------------------------------------------------------------------------------------------------- */
89void computePabloDependencyChainMetrics(const PabloFunction & f) {
90
91    std::queue<const PabloAST *> Q;
92    std::unordered_map<const PabloAST *, unsigned> V;
93
94    for (unsigned i = 0; i != f.getNumOfResults(); ++i) {
95        V.insert(std::make_pair(f.getResult(i), 0));
96        const PabloAST * expr = f.getResult(i)->getExpression();
97        if (expr->getNumUses() == 1 && V.count(expr) == 0) {
98            V.insert(std::make_pair(expr, 1));
99            if (LLVM_LIKELY(isa<Statement>(expr))) {
100                Q.push(cast<Statement>(expr));
101            }
102        }
103    }
104
105    while (!Q.empty()) {
106        const PabloAST * expr = Q.front(); Q.pop();
107        unsigned lpl = 0; // longest path length
108        for (const PabloAST * user : expr->users()) {
109            lpl = std::max<unsigned>(lpl, V[user]);
110        }
111        V.insert(std::make_pair(expr, lpl + 1));
112        if (const Statement * stmt = dyn_cast<Statement>(expr)) {
113            for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
114                assert (V.count(stmt->getOperand(i)) == 0);
115                bool everyUserOfThisOperandWasProcessed = true;
116                for (const PabloAST * user : stmt->getOperand(i)->users()) {
117                    if (V.count(user) == 0) {
118                        everyUserOfThisOperandWasProcessed = false;
119                        break;
120                    }
121                }
122                if (everyUserOfThisOperandWasProcessed) {
123                    Q.push(stmt->getOperand(i));
124                }
125            }
126        }
127    }
128
129    unsigned lpl = 0;
130    for (unsigned i = 0; i != f.getNumOfParameters(); ++i) {
131        assert (V.count(f.getParameter(i)));
132        lpl = std::max<unsigned>(lpl, V[f.getParameter(i)]);
133    }
134
135
136
137}
138
139/** ------------------------------------------------------------------------------------------------------------- *
140 * @brief computeLLVMDependencyMetrics
141 ** ------------------------------------------------------------------------------------------------------------- */
142void computeLLVMDependencyChainMetrics(const llvm::Function & f) {
143
144
145
146}
147
148/** ------------------------------------------------------------------------------------------------------------- *
149 * @brief compileUnicodeSet
150 ** ------------------------------------------------------------------------------------------------------------- */
151size_t compileUnicodeSet(std::string name, const UnicodeSet & set, PabloCompiler & pc, Module * module) {
152    #ifdef ENABLE_MULTIPLEXING
153    if (MultiplexingDistributionFile) {
154        (*MultiplexingDistributionFile) << name;
155    }
156    #endif
157    PabloFunction function = PabloFunction::Create(std::move(name), 8, 1);
158    Encoding encoding(Encoding::Type::UTF_8, 8);
159    CC_Compiler ccCompiler(function, encoding);
160    UCDCompiler ucdCompiler(ccCompiler);
161    PabloBuilder builder(function.getEntryBlock());
162    // Build the unicode set function
163    function.setResult(0, builder.createAssign("matches", ucdCompiler.generateWithDefaultIfHierarchy(set, builder)));
164    // Optimize it at the pablo level
165    Simplifier::optimize(function);
166    CodeSinking::optimize(function);
167    #ifdef ENABLE_MULTIPLEXING
168    if (EnableMultiplexing) {
169        if (MultiplexingDistributionFile) {
170            (*MultiplexingDistributionFile) << ',' << getNumOfAdvances(function.getEntryBlock());
171        }
172        AutoMultiplexing::optimize(function);
173        Simplifier::optimize(function);
174        if (MultiplexingDistributionFile) {
175            (*MultiplexingDistributionFile) << ',' << getNumOfAdvances(function.getEntryBlock()) << '\n';
176        }
177    }
178    #endif
179    // Now compile the function ...
180    auto func = pc.compile(function, module);
181    releaseSlabAllocatorMemory();
182
183    return func.second;
184}
185
186/** ------------------------------------------------------------------------------------------------------------- *
187 * @brief writePropertyInstaller
188 ** ------------------------------------------------------------------------------------------------------------- */
189
190void writePrecompiledProperties(property_list && properties) {
191
192    const std::string headerFilename = UCDSourcePath + "/precompiled_properties.h";
193    #ifdef USE_LLVM_3_5
194    std::string error;
195    raw_fd_ostream header(headerFilename.c_str(), error, sys::fs::F_None);
196    if (!error.empty()) {
197        throw std::runtime_error(error);
198    }
199    #else
200    std::error_code error;
201    raw_fd_ostream header(headerFilename, error, sys::fs::F_None);
202    if (error) {
203        throw std::runtime_error(error.message());
204    }
205    #endif
206
207    header << "#ifndef PRECOMPILED_PROPERTIES\n";
208    header << "#define PRECOMPILED_PROPERTIES\n\n";
209    header << "#include <string>\n\n";
210    header << "#include <tuple>\n";
211    header << "namespace UCD {\n\n";
212    header << "using ExternalProperty = std::tuple<void *, unsigned, unsigned, size_t>;\n\n";
213    header << "const ExternalProperty & resolveExternalProperty(const std::string & name);\n\n";
214    header << "}\n\n";
215    header << "#endif\n";
216    header.close();
217
218    const std::string cppFilename = UCDSourcePath + "/precompiled_properties.cpp";
219    #ifdef USE_LLVM_3_5
220    raw_fd_ostream cpp(cppFilename.c_str(), error, sys::fs::F_None);
221    if (!error.empty()) {
222        throw std::runtime_error(error);
223    }
224    #else
225    raw_fd_ostream cpp(cppFilename, error, sys::fs::F_None);
226    if (error) {
227        throw std::runtime_error(error.message());
228    }
229    #endif
230
231    cpp << "#include \"precompiled_properties.h\"\n";
232    cpp << "#include <include/simd-lib/bitblock.hpp>\n";
233    cpp << "#include <stdexcept>\n";
234    cpp << "#include <unordered_map>\n\n";
235    cpp << "namespace UCD {\nnamespace {\n\n";
236    cpp << "struct Input {\n    BitBlock bit[8];\n};\n\n";
237    cpp << "struct Output {\n    BitBlock bit[1];\n};\n\n";
238    for (auto prop : properties) {
239        cpp << "extern \"C\" void " + prop.first + "(const Input &, BitBlock *, Output &);\n";
240    }
241
242    cpp << "\nconst static std::unordered_map<std::string, ExternalProperty> EXTERNAL_UCD_PROPERTY_MAP = {\n";
243    for (auto itr = properties.begin(); itr != properties.end(); ) {
244        cpp << "    {\"" + itr->first + "\", std::make_tuple(reinterpret_cast<void *>(&" + itr->first + "), 8, 1, " + std::to_string(itr->second) + ")}";
245        if (++itr != properties.end()) {
246            cpp << ",";
247        }
248        cpp << "\n";
249    }
250    cpp << "};\n\n} // end of anonymous namespace\n\n";
251
252    cpp << "const ExternalProperty & resolveExternalProperty(const std::string & name) {\n";
253    cpp << "    auto f = EXTERNAL_UCD_PROPERTY_MAP.find(name);\n";
254    cpp << "    if (f == EXTERNAL_UCD_PROPERTY_MAP.end())\n";
255    cpp << "        throw std::runtime_error(\"No external property named \\\"\" + name + \"\\\" found!\");\n";
256    cpp << "    return f->second;\n";
257    cpp << "}\n\n} // end of UCD namespace\n";
258
259    cpp.close();
260
261}
262
263/** ------------------------------------------------------------------------------------------------------------- *
264 * @brief generateUCDModule
265 ** ------------------------------------------------------------------------------------------------------------- */
266Module * generateUCDModule() {
267
268    property_list properties;
269
270    PabloCompiler pc;
271    Module * module = new Module("ucd", getGlobalContext());
272    for (PropertyObject * obj : property_object_table) {
273        if (EnumeratedPropertyObject * enumObj = dyn_cast<EnumeratedPropertyObject>(obj)) {
274            for (const std::string value : *enumObj) {
275                const UnicodeSet & set = enumObj->GetCodepointSet(canonicalize_value_name(value));
276                std::string name = "__get_" + property_enum_name[enumObj->getPropertyCode()] + "_" + value;
277                properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
278            }
279        }
280        else if (ExtensionPropertyObject * extObj = dyn_cast<ExtensionPropertyObject>(obj)) {
281            for (const std::string value : *extObj) {
282                const UnicodeSet & set = extObj->GetCodepointSet(canonicalize_value_name(value));
283                std::string name = "__get_" + property_enum_name[extObj->getPropertyCode()] + "_" + value;
284                properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
285            }
286        }
287        else if (BinaryPropertyObject * binObj = dyn_cast<BinaryPropertyObject>(obj)) {
288            const UnicodeSet & set = binObj->GetCodepointSet(Binary_ns::Y);
289            std::string name = "__get_" + property_enum_name[binObj->getPropertyCode()] + "_Y";
290            properties.emplace_back(name, compileUnicodeSet(name, set, pc, module));
291        }
292    }
293
294    // Print an error message if our module is malformed in any way.
295    verifyModule(*module, &dbgs());
296
297    writePrecompiledProperties(std::move(properties));
298
299    return module;
300}
301
302/** ------------------------------------------------------------------------------------------------------------- *
303 * @brief compileUCDModule
304 ** ------------------------------------------------------------------------------------------------------------- */
305void compileUCDModule(Module * module) {
306    Triple TheTriple;
307
308    TheTriple.setTriple(sys::getDefaultTargetTriple());
309
310    // Get the target specific parser.
311    std::string msg;
312    const Target * TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), msg);
313    if (TheTarget == nullptr) {
314        throw std::runtime_error(msg);
315    }
316
317    TargetOptions Options;
318
319    std::unique_ptr<TargetMachine> Target(
320                TheTarget->createTargetMachine(TheTriple.getTriple(), sys::getHostCPUName(), "", Options,
321                                               Reloc::Default, CodeModel::Small, CodeGenOpt::Aggressive));
322
323    if (Target == nullptr) {
324        throw std::runtime_error("Could not allocate target machine!");
325    }
326
327    #ifdef USE_LLVM_3_5
328    std::string error;
329    std::unique_ptr<tool_output_file> out = make_unique<tool_output_file>(ObjectFilename.c_str(), error, sys::fs::F_None);
330    if (!error.empty()) {
331        throw std::runtime_error(error);
332    }
333    #else
334    std::error_code error;
335    std::unique_ptr<tool_output_file> out = make_unique<tool_output_file>(ObjectFilename, error, sys::fs::F_None);
336    if (error) {
337        throw std::runtime_error(error.message());
338    }
339    #endif
340
341    // Build up all of the passes that we want to do to the module.
342    PassManager PM;
343
344    // Add an appropriate TargetLibraryInfo pass for the module's triple.
345    PM.add(new TargetLibraryInfo(TheTriple));
346
347    // Add the target data from the target machine, if it exists, or the module.
348    #ifdef USE_LLVM_3_5
349    const DataLayout * DL = Target->getDataLayout();
350    #else
351    const DataLayout * DL = Target->getSubtargetImpl()->getDataLayout();
352    #endif
353    if (DL) {
354        module->setDataLayout(DL);
355    }
356    #ifdef USE_LLVM_3_5
357    PM.add(new DataLayoutPass(module));
358    #else
359    PM.add(new DataLayoutPass());
360    #endif   
361    PM.add(createReassociatePass());
362    PM.add(createInstructionCombiningPass());
363    PM.add(createSinkingPass());
364
365    formatted_raw_ostream outStream(out->os());
366    // Ask the target to add backend passes as necessary.
367    if (Target->addPassesToEmitFile(PM, outStream, TargetMachine::CGFT_ObjectFile)) {
368        throw std::runtime_error("Target does not support generation of object file type!\n");
369    }
370
371    PM.run(*module);
372
373
374    out->keep();
375}
376
377/** ------------------------------------------------------------------------------------------------------------- *
378 * @brief main
379 ** ------------------------------------------------------------------------------------------------------------- */
380int main(int argc, char *argv[]) {
381    // Initialize targets first, so that --version shows registered targets.
382    InitializeAllTargets();
383    InitializeAllTargetMCs();
384    InitializeAllAsmPrinters();
385    InitializeAllAsmParsers();
386    cl::ParseCommandLineOptions(argc, argv, "UCD Compiler\n");
387
388
389
390
391    #ifdef ENABLE_MULTIPLEXING
392    if (MultiplexingDistribution.length() > 0) {
393        #ifdef USE_LLVM_3_5
394        std::string error;
395        MultiplexingDistributionFile = new raw_fd_ostream(MultiplexingDistribution.c_str(), error, sys::fs::F_Text);
396        if (!error.empty()) {
397            throw std::runtime_error(error);
398        }
399        #else
400        std::error_code error;
401        MultiplexingDistributionFile = new raw_fd_ostream(MultiplexingDistribution, error, sys::fs::F_Text);
402        if (error) {
403            throw std::runtime_error(error.message());
404        }
405        #endif
406    }
407    #endif
408    Module * module = generateUCDModule();
409    #ifdef ENABLE_MULTIPLEXING
410    if (MultiplexingDistributionFile) {
411        MultiplexingDistributionFile->close();
412        delete MultiplexingDistributionFile;
413    }
414    #endif
415    compileUCDModule(module);
416    return 0;
417}
Note: See TracBrowser for help on using the repository browser.