source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5351

Last change on this file since 5351 was 5351, checked in by nmedfort, 2 years ago

Update to BlockOrientedKernel? to move the indirect branch out of the StrideLoopBody? and into StrideLoopDone? to simplify branch prediction.

File size: 8.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/ADT/SmallString.h>                  // for SmallString
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/Verifier.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#include <llvm/PassRegistry.h>                     // for PassRegistry
18#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
19#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
20#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
21#include <llvm/Target/TargetOptions.h>             // for TargetOptions
22#include <llvm/Transforms/Scalar.h>
23#include <llvm/Transforms/Utils/Local.h>
24#include <object_cache.h>
25#ifdef CUDA_ENABLED
26#include <IR_Gen/llvm2ptx.h>
27#endif
28 
29using namespace llvm;
30
31namespace codegen {
32
33static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
34
35static cl::bits<DebugFlags>
36DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
37#if LLVM_VERSION_MINOR > 6
38                        clEnumVal(ShowASM, "Print assembly code."),
39#endif
40                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
41                        clEnumValEnd), cl::cat(CodeGenOptions));
42
43static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
44#if LLVM_VERSION_MINOR > 6
45static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
46static cl::opt<bool> AsmVerbose("asm-verbose",
47                                cl::desc("Add comments to directives."),
48                                cl::init(true), cl::cat(CodeGenOptions));
49#endif
50
51char OptLevel;
52static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
53                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
54
55
56static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
57
58static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
59
60
61int BlockSize;
62int SegmentSize;
63int BufferSegments;
64int ThreadNum;
65bool EnableAsserts;
66
67
68
69static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
70static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
71static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
72static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
73
74static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(
75#ifndef NDEBUG
76true
77#else
78false
79#endif
80));
81
82const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
83
84bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
85
86   
87#ifdef CUDA_ENABLED
88bool NVPTX;
89int GroupNum;
90static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
91static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
92#endif
93
94}
95
96
97#ifdef CUDA_ENABLED
98void setNVPTXOption(){
99    codegen::NVPTX = codegen::USENVPTX;
100}
101
102void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
103    InitializeAllTargets();
104    InitializeAllTargetMCs();
105    InitializeAllAsmPrinters();
106    InitializeAllAsmParsers();
107
108    PassRegistry *Registry = PassRegistry::getPassRegistry();
109    initializeCore(*Registry);
110    initializeCodeGen(*Registry);
111    initializeLoopStrengthReducePass(*Registry);
112    initializeLowerIntrinsicsPass(*Registry);
113    initializeUnreachableBlockElimPass(*Registry);
114
115    std::error_code error;
116    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
117    m->print(out, nullptr);
118
119    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
120            m->dump();
121
122    llvm2ptx(IRFilename, PTXFilename);
123}
124#endif
125
126
127void setAllFeatures(EngineBuilder &builder) {
128    StringMap<bool> HostCPUFeatures;
129    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
130        std::vector<std::string> attrs;
131        for (auto &flag : HostCPUFeatures) {
132            auto enabled = flag.second ? "+" : "-";
133            attrs.push_back(enabled + flag.first().str());
134        }
135        builder.setMAttrs(attrs);
136    }
137}
138
139bool AVX2_available() {
140    StringMap<bool> HostCPUFeatures;
141    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
142        auto f = HostCPUFeatures.find("avx2");
143        return ((f != HostCPUFeatures.end()) && f->second);
144    }
145    return false;
146}
147
148#ifndef USE_LLVM_3_6
149void WriteAssembly (TargetMachine *TM, Module * m) {
150    legacy::PassManager PM;
151
152    SmallString<128> Str;
153    raw_svector_ostream dest(Str);
154
155    if (TM->addPassesToEmitFile(PM, dest, TargetMachine::CGFT_AssemblyFile ) ) {
156        throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
157    }
158    PM.run(*m);
159
160    if (codegen::ASMOutputFilename.empty()) {
161        errs() << Str;
162    } else {
163        std::error_code error;
164        raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
165        out << Str;
166    }
167}
168#endif
169
170ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
171
172    // Use the pass manager to optimize the function.
173    legacy::PassManager PM;
174    #ifndef NDEBUG
175    PM.add(createVerifierPass());
176    #endif
177    PM.add(createReassociatePass());             //Reassociate expressions.
178    PM.add(createGVNPass());                     //Eliminate common subexpressions.
179    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
180    PM.add(createCFGSimplificationPass());
181    PM.run(*m);
182
183    InitializeNativeTarget();
184    InitializeNativeTargetAsmPrinter();
185    InitializeNativeTargetAsmParser();
186
187    PassRegistry * Registry = PassRegistry::getPassRegistry();
188    initializeCore(*Registry);
189    initializeCodeGen(*Registry);
190    initializeLowerIntrinsicsPass(*Registry);
191
192    std::string errMessage;
193    EngineBuilder builder{std::unique_ptr<Module>(m)};
194    builder.setErrorStr(&errMessage);
195    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
196    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
197
198    builder.setTargetOptions(opts);
199    builder.setVerifyModules(true);
200    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
201    switch (codegen::OptLevel) {
202        case '0': optLevel = CodeGenOpt::None; break;
203        case '1': optLevel = CodeGenOpt::Less; break;
204        case '2': optLevel = CodeGenOpt::Default; break;
205        case '3': optLevel = CodeGenOpt::Aggressive; break;
206        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
207    }
208    builder.setOptLevel(optLevel);
209
210    setAllFeatures(builder);
211
212    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
213        if (codegen::IROutputFilename.empty()) {
214            m->dump();
215        } else {
216            std::error_code error;
217            raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
218            m->print(out, nullptr);
219        }
220    }
221#if LLVM_VERSION_MINOR > 6
222    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
223      WriteAssembly(builder.selectTarget(), m);
224    }
225#endif
226    ExecutionEngine * engine = builder.create();
227    if (engine == nullptr) {
228        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
229    }   
230    return engine;
231}
232
233void ApplyObjectCache(ExecutionEngine * e) {
234    ICGrepObjectCache * cache = nullptr;
235    if (codegen::EnableObjectCache) {
236        if (codegen::ObjectCacheDir.empty())
237            // Default is $HOME/.cache/icgrep
238            cache = new ICGrepObjectCache();
239        else
240            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
241        e->setObjectCache(cache);
242    }   
243}
244
245
Note: See TracBrowser for help on using the repository browser.