source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5350

Last change on this file since 5350 was 5350, checked in by nmedfort, 2 years ago

First attempt at inlining all DoBlock? and FinalBlock? functions by using indirect jumps. Disabled for NVPTX until Linda can check whether they're supported by the LLVM NVPTX library.

File size: 9.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/ADT/SmallString.h>                  // for SmallString
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#include <llvm/PassRegistry.h>                     // for PassRegistry
17#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
18#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
19#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
20#include <llvm/Target/TargetOptions.h>             // for TargetOptions
21#include <llvm/Transforms/Scalar.h>
22#include <llvm/Transforms/Utils/Local.h>
23#ifndef NDEBUG
24#include <llvm/IR/Verifier.h>
25#endif
26#include <object_cache.h>
27namespace llvm { class Module; }
28#ifdef CUDA_ENABLED
29#include <IR_Gen/llvm2ptx.h>
30#endif
31 
32using namespace llvm;
33
34namespace codegen {
35
36static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
37
38static cl::bits<DebugFlags>
39DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
40#if LLVM_VERSION_MINOR > 6
41                        clEnumVal(ShowASM, "Print assembly code."),
42#endif
43                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
44                        clEnumValEnd), cl::cat(CodeGenOptions));
45
46static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
47#if LLVM_VERSION_MINOR > 6
48static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
49static cl::opt<bool> AsmVerbose("asm-verbose",
50                                cl::desc("Add comments to directives."),
51                                cl::init(true), cl::cat(CodeGenOptions));
52#endif
53
54char OptLevel;
55static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
56                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
57
58
59static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
60
61static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
62
63
64int BlockSize;
65int SegmentSize;
66int BufferSegments;
67int ThreadNum;
68bool EnableAsserts;
69
70static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
71static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
72static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
73static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
74
75static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(
76#ifndef NDEBUG
77true
78#else
79false
80#endif
81));
82
83const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
84
85bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
86
87   
88#ifdef CUDA_ENABLED
89bool NVPTX;
90int GroupNum;
91static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
92static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
93#endif
94
95}
96
97
98#ifdef CUDA_ENABLED
99void setNVPTXOption(){
100    codegen::NVPTX = codegen::USENVPTX;
101}
102
103void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
104    InitializeAllTargets();
105    InitializeAllTargetMCs();
106    InitializeAllAsmPrinters();
107    InitializeAllAsmParsers();
108
109    PassRegistry *Registry = PassRegistry::getPassRegistry();
110    initializeCore(*Registry);
111    initializeCodeGen(*Registry);
112    initializeLoopStrengthReducePass(*Registry);
113    initializeLowerIntrinsicsPass(*Registry);
114    initializeUnreachableBlockElimPass(*Registry);
115
116    std::error_code error;
117    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
118    m->print(out, nullptr);
119
120    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
121            m->dump();
122
123    llvm2ptx(IRFilename, PTXFilename);
124}
125#endif
126
127
128void setAllFeatures(EngineBuilder &builder) {
129    StringMap<bool> HostCPUFeatures;
130    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
131        std::vector<std::string> attrs;
132        for (auto &flag : HostCPUFeatures) {
133            auto enabled = flag.second ? "+" : "-";
134            attrs.push_back(enabled + flag.first().str());
135        }
136        builder.setMAttrs(attrs);
137    }
138}
139
140bool AVX2_available() {
141    StringMap<bool> HostCPUFeatures;
142    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
143        auto f = HostCPUFeatures.find("avx2");
144        return ((f != HostCPUFeatures.end()) && f->second);
145    }
146    return false;
147}
148
149#ifndef USE_LLVM_3_6
150void WriteAssembly (TargetMachine *TM, Module * m) {
151    legacy::PassManager PM;
152
153    SmallString<128> Str;
154    raw_svector_ostream dest(Str);
155
156    if (TM->addPassesToEmitFile(PM, dest, TargetMachine::CGFT_AssemblyFile ) ) {
157        throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
158    }
159    PM.run(*m);
160
161    if (codegen::ASMOutputFilename.empty()) {
162        errs() << Str;
163    } else {
164        std::error_code error;
165        raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
166        out << Str;
167    }
168}
169#endif
170
171ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
172
173    // Use the pass manager to optimize the function.
174    legacy::PassManager PM;
175    #ifndef NDEBUG
176    PM.add(createVerifierPass());
177    #endif
178    PM.add(createReassociatePass());             //Reassociate expressions.
179    PM.add(createGVNPass());                     //Eliminate common subexpressions.
180    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
181    PM.add(createCFGSimplificationPass());
182    PM.run(*m);
183
184    InitializeNativeTarget();
185    InitializeNativeTargetAsmPrinter();
186    InitializeNativeTargetAsmParser();
187
188    PassRegistry * Registry = PassRegistry::getPassRegistry();
189    initializeCore(*Registry);
190    initializeCodeGen(*Registry);
191    initializeLowerIntrinsicsPass(*Registry);
192
193    std::string errMessage;
194    EngineBuilder builder{std::unique_ptr<Module>(m)};
195    builder.setErrorStr(&errMessage);
196    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
197    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
198
199    builder.setTargetOptions(opts);
200    builder.setVerifyModules(true);
201    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
202    switch (codegen::OptLevel) {
203        case '0': optLevel = CodeGenOpt::None; break;
204        case '1': optLevel = CodeGenOpt::Less; break;
205        case '2': optLevel = CodeGenOpt::Default; break;
206        case '3': optLevel = CodeGenOpt::Aggressive; break;
207        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
208    }
209    builder.setOptLevel(optLevel);
210
211    setAllFeatures(builder);
212
213    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
214        if (codegen::IROutputFilename.empty()) {
215            m->dump();
216        } else {
217            std::error_code error;
218            raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
219            m->print(out, nullptr);
220        }
221    }
222#if LLVM_VERSION_MINOR > 6
223    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
224      WriteAssembly(builder.selectTarget(), m);
225    }
226#endif
227    ExecutionEngine * engine = builder.create();
228    if (engine == nullptr) {
229        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
230    }   
231    return engine;
232}
233
234void ApplyObjectCache(ExecutionEngine * e) {
235    ICGrepObjectCache * cache = nullptr;
236    if (codegen::EnableObjectCache) {
237        if (codegen::ObjectCacheDir.empty())
238            // Default is $HOME/.cache/icgrep
239            cache = new ICGrepObjectCache();
240        else
241            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
242        e->setObjectCache(cache);
243    }   
244}
245
246
Note: See TracBrowser for help on using the repository browser.