source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5361

Last change on this file since 5361 was 5361, checked in by nmedfort, 3 years ago

Work on non-carry collapsing mode.

File size: 9.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/ADT/SmallString.h>                  // for SmallString
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/Verifier.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#include <llvm/PassRegistry.h>                     // for PassRegistry
18#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
19#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
20#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
21#include <llvm/Target/TargetOptions.h>             // for TargetOptions
22#include <llvm/Transforms/Scalar.h>
23#include <llvm/Transforms/Utils/Local.h>
24#include <object_cache.h>
25#ifdef CUDA_ENABLED
26#include <IR_Gen/llvm2ptx.h>
27#endif
28 
29using namespace llvm;
30
31namespace codegen {
32
33static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
34
35static cl::bits<DebugFlags>
36DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
37#if LLVM_VERSION_MINOR > 6
38                        clEnumVal(ShowASM, "Print assembly code."),
39#endif
40                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
41                        clEnumValEnd), cl::cat(CodeGenOptions));
42
43static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
44#if LLVM_VERSION_MINOR > 6
45static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
46static cl::opt<bool> AsmVerbose("asm-verbose",
47                                cl::desc("Add comments to directives."),
48                                cl::init(true), cl::cat(CodeGenOptions));
49#endif
50
51char OptLevel;
52static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
53                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
54
55
56static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
57
58static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
59
60
61int BlockSize;
62int SegmentSize;
63int BufferSegments;
64int ThreadNum;
65bool EnableAsserts;
66#ifndef NDEBUG
67#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
68#else
69#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
70#endif
71
72static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
73static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
74static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
75static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
76static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
77
78const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
79
80bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
81
82   
83#ifdef CUDA_ENABLED
84bool NVPTX;
85int GroupNum;
86static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
87static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
88#endif
89
90}
91
92
93#ifdef CUDA_ENABLED
94void setNVPTXOption(){
95    codegen::NVPTX = codegen::USENVPTX;
96}
97
98void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
99    InitializeAllTargets();
100    InitializeAllTargetMCs();
101    InitializeAllAsmPrinters();
102    InitializeAllAsmParsers();
103
104    PassRegistry *Registry = PassRegistry::getPassRegistry();
105    initializeCore(*Registry);
106    initializeCodeGen(*Registry);
107    initializeLoopStrengthReducePass(*Registry);
108    initializeLowerIntrinsicsPass(*Registry);
109    initializeUnreachableBlockElimPass(*Registry);
110
111    std::error_code error;
112    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
113    m->print(out, nullptr);
114
115    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
116            m->dump();
117
118    llvm2ptx(IRFilename, PTXFilename);
119}
120#endif
121
122
123void setAllFeatures(EngineBuilder &builder) {
124    StringMap<bool> HostCPUFeatures;
125    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
126        std::vector<std::string> attrs;
127        for (auto &flag : HostCPUFeatures) {
128            auto enabled = flag.second ? "+" : "-";
129            attrs.push_back(enabled + flag.first().str());
130        }
131        builder.setMAttrs(attrs);
132    }
133}
134
135bool AVX2_available() {
136    StringMap<bool> HostCPUFeatures;
137    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
138        auto f = HostCPUFeatures.find("avx2");
139        return ((f != HostCPUFeatures.end()) && f->second);
140    }
141    return false;
142}
143
144#ifndef USE_LLVM_3_6
145void WriteAssembly (TargetMachine *TM, Module * m) {
146    legacy::PassManager PM;
147
148    SmallString<128> Str;
149    raw_svector_ostream dest(Str);
150
151    if (TM->addPassesToEmitFile(PM, dest, TargetMachine::CGFT_AssemblyFile ) ) {
152        throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
153    }
154    PM.run(*m);
155
156    if (codegen::ASMOutputFilename.empty()) {
157        errs() << Str;
158    } else {
159        std::error_code error;
160        raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
161        out << Str;
162    }
163}
164#endif
165
166ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
167
168    // Use the pass manager to optimize the function.
169    legacy::PassManager PM;
170    #ifndef NDEBUG
171    PM.add(createVerifierPass());
172    #endif
173    PM.add(createReassociatePass());             //Reassociate expressions.
174    PM.add(createGVNPass());                     //Eliminate common subexpressions.
175    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
176    PM.add(createCFGSimplificationPass());   
177    PM.run(*m);
178
179    InitializeNativeTarget();
180    InitializeNativeTargetAsmPrinter();
181    InitializeNativeTargetAsmParser();
182
183    PassRegistry * Registry = PassRegistry::getPassRegistry();
184    initializeCore(*Registry);
185    initializeCodeGen(*Registry);
186    initializeLowerIntrinsicsPass(*Registry);
187
188    std::string errMessage;
189    EngineBuilder builder{std::unique_ptr<Module>(m)};
190    builder.setErrorStr(&errMessage);
191    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
192    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
193
194    builder.setTargetOptions(opts);
195    builder.setVerifyModules(true);
196    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
197    switch (codegen::OptLevel) {
198        case '0': optLevel = CodeGenOpt::None; break;
199        case '1': optLevel = CodeGenOpt::Less; break;
200        case '2': optLevel = CodeGenOpt::Default; break;
201        case '3': optLevel = CodeGenOpt::Aggressive; break;
202        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
203    }
204    builder.setOptLevel(optLevel);
205
206    setAllFeatures(builder);
207
208    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
209        if (codegen::IROutputFilename.empty()) {
210            m->dump();
211        } else {
212            std::error_code error;
213            raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
214            m->print(out, nullptr);
215        }
216    }
217#if LLVM_VERSION_MINOR > 6
218    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
219        WriteAssembly(builder.selectTarget(), m);
220    }
221#endif
222    ExecutionEngine * engine = builder.create();
223    if (engine == nullptr) {
224        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
225    }
226    return engine;
227}
228
229void ApplyObjectCache(ExecutionEngine * e) {
230    ICGrepObjectCache * cache = nullptr;
231    if (codegen::EnableObjectCache) {
232        if (codegen::ObjectCacheDir.empty())
233            // Default is $HOME/.cache/icgrep
234            cache = new ICGrepObjectCache();
235        else
236            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
237        e->setObjectCache(cache);
238    }
239}
240
241
Note: See TracBrowser for help on using the repository browser.