source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5387

Last change on this file since 5387 was 5377, checked in by nmedfort, 2 years ago

Support for stdin. Needs more testing.

File size: 10.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/ADT/SmallString.h>                  // for SmallString
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/Verifier.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#include <llvm/PassRegistry.h>                     // for PassRegistry
18#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
19#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
20#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
21#include <llvm/Target/TargetOptions.h>             // for TargetOptions
22#include <llvm/Transforms/Scalar.h>
23#include <llvm/Transforms/Utils/Local.h>
24#include <object_cache.h>
25#include <kernels/pipeline.h>
26#ifdef CUDA_ENABLED
27#include <IR_Gen/llvm2ptx.h>
28#endif
29 
30using namespace llvm;
31
32namespace codegen {
33
34static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
35
36static cl::bits<DebugFlags>
37DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
38#if LLVM_VERSION_MINOR > 6
39                        clEnumVal(ShowASM, "Print assembly code."),
40#endif
41                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
42                        clEnumValEnd), cl::cat(CodeGenOptions));
43
44static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
45#if LLVM_VERSION_MINOR > 6
46static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
47static cl::opt<bool> AsmVerbose("asm-verbose",
48                                cl::desc("Add comments to directives."),
49                                cl::init(true), cl::cat(CodeGenOptions));
50#endif
51
52char OptLevel;
53static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
54                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
55
56
57static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
58
59static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
60
61
62int BlockSize;
63int SegmentSize;
64int BufferSegments;
65int ThreadNum;
66bool EnableAsserts;
67#ifndef NDEBUG
68#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
69#else
70#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
71#endif
72
73static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
74static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
75static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
76static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
77static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
78
79const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
80
81bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
82
83static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
84   
85static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
86   
87
88   
89#ifdef CUDA_ENABLED
90bool NVPTX;
91int GroupNum;
92static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
93static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
94#endif
95
96}
97
98
99#ifdef CUDA_ENABLED
100void setNVPTXOption(){
101    codegen::NVPTX = codegen::USENVPTX;
102}
103
104void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
105    InitializeAllTargets();
106    InitializeAllTargetMCs();
107    InitializeAllAsmPrinters();
108    InitializeAllAsmParsers();
109
110    PassRegistry *Registry = PassRegistry::getPassRegistry();
111    initializeCore(*Registry);
112    initializeCodeGen(*Registry);
113    initializeLoopStrengthReducePass(*Registry);
114    initializeLowerIntrinsicsPass(*Registry);
115    initializeUnreachableBlockElimPass(*Registry);
116
117    std::error_code error;
118    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
119    m->print(out, nullptr);
120
121    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
122            m->dump();
123
124    llvm2ptx(IRFilename, PTXFilename);
125}
126#endif
127
128void printParabixVersion () {
129    raw_ostream &OS = outs();
130    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
131}
132
133void AddParabixVersionPrinter() {
134    cl::AddExtraVersionPrinter(&printParabixVersion);
135}
136
137
138
139void setAllFeatures(EngineBuilder &builder) {
140    StringMap<bool> HostCPUFeatures;
141    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
142        std::vector<std::string> attrs;
143        for (auto &flag : HostCPUFeatures) {
144            auto enabled = flag.second ? "+" : "-";
145            attrs.push_back(enabled + flag.first().str());
146        }
147        builder.setMAttrs(attrs);
148    }
149}
150
151bool AVX2_available() {
152    StringMap<bool> HostCPUFeatures;
153    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
154        auto f = HostCPUFeatures.find("avx2");
155        return ((f != HostCPUFeatures.end()) && f->second);
156    }
157    return false;
158}
159
160#ifndef USE_LLVM_3_6
161void WriteAssembly (TargetMachine *TM, Module * m) {
162    legacy::PassManager PM;
163
164    SmallString<128> Str;
165    raw_svector_ostream dest(Str);
166
167    if (TM->addPassesToEmitFile(PM, dest, TargetMachine::CGFT_AssemblyFile ) ) {
168        throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
169    }
170    PM.run(*m);
171
172    if (codegen::ASMOutputFilename.empty()) {
173        errs() << Str;
174    } else {
175        std::error_code error;
176        raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
177        out << Str;
178    }
179}
180#endif
181
182ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
183
184    // Use the pass manager to optimize the function.
185    #ifndef NDEBUG
186    try {
187    #endif
188    legacy::PassManager PM;
189    #ifndef NDEBUG
190    PM.add(createVerifierPass());
191    #endif
192    PM.add(createReassociatePass());             //Reassociate expressions.
193    PM.add(createGVNPass());                     //Eliminate common subexpressions.
194    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
195    PM.add(createCFGSimplificationPass());   
196    PM.run(*m);
197    #ifndef NDEBUG
198    } catch (...) { m->dump(); throw; }
199    #endif
200    InitializeNativeTarget();
201    InitializeNativeTargetAsmPrinter();
202    InitializeNativeTargetAsmParser();
203
204    PassRegistry * Registry = PassRegistry::getPassRegistry();
205    initializeCore(*Registry);
206    initializeCodeGen(*Registry);
207    initializeLowerIntrinsicsPass(*Registry);
208
209    std::string errMessage;
210    EngineBuilder builder{std::unique_ptr<Module>(m)};
211    builder.setErrorStr(&errMessage);
212    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
213    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
214
215    builder.setTargetOptions(opts);
216    builder.setVerifyModules(true);
217    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
218    switch (codegen::OptLevel) {
219        case '0': optLevel = CodeGenOpt::None; break;
220        case '1': optLevel = CodeGenOpt::Less; break;
221        case '2': optLevel = CodeGenOpt::Default; break;
222        case '3': optLevel = CodeGenOpt::Aggressive; break;
223        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
224    }
225    builder.setOptLevel(optLevel);
226
227    setAllFeatures(builder);
228
229    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
230        if (codegen::IROutputFilename.empty()) {
231            m->dump();
232        } else {
233            std::error_code error;
234            raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
235            m->print(out, nullptr);
236        }
237    }
238#if LLVM_VERSION_MINOR > 6
239    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
240        WriteAssembly(builder.selectTarget(), m);
241    }
242#endif
243    ExecutionEngine * engine = builder.create();
244    if (engine == nullptr) {
245        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
246    }
247    return engine;
248}
249
250void ApplyObjectCache(ExecutionEngine * e) {
251    ICGrepObjectCache * cache = nullptr;
252    if (codegen::EnableObjectCache) {
253        if (codegen::ObjectCacheDir.empty())
254            // Default is $HOME/.cache/icgrep
255            cache = new ICGrepObjectCache();
256        else
257            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
258        e->setObjectCache(cache);
259    }
260}
261
262void generatePipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels) {
263    if (codegen::pipelineParallel) {
264        generateParallelPipeline(iBuilder, kernels);
265    } else if (codegen::segmentPipelineParallel) {
266        generateSegmentParallelPipeline(iBuilder, kernels);
267    } else {
268        codegen::ThreadNum = 1;
269        generatePipelineLoop(iBuilder, kernels);
270    }
271}
272
273
Note: See TracBrowser for help on using the repository browser.