source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5341

Last change on this file since 5341 was 5314, checked in by lindanl, 2 years ago

Extend icgrep to use multiple groups of thread on GPU.

File size: 8.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/ADT/SmallString.h>                  // for SmallString
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#include <llvm/PassRegistry.h>                     // for PassRegistry
17#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
18#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
19#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
20#include <llvm/Target/TargetOptions.h>             // for TargetOptions
21#include <object_cache.h>
22namespace llvm { class Module; }
23#ifdef CUDA_ENABLED
24#include <IR_Gen/llvm2ptx.h>
25#endif
26 
27using namespace llvm;
28
29namespace codegen {
30
31static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
32static cl::bits<DebugFlags>
33DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
34#if LLVM_VERSION_MINOR > 6
35                        clEnumVal(ShowASM, "Print assembly code."),
36#endif
37                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
38                        clEnumValEnd), cl::cat(CodeGenOptions));
39
40static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
41#if LLVM_VERSION_MINOR > 6
42static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
43static cl::opt<bool> AsmVerbose("asm-verbose",
44                                cl::desc("Add comments to directives."),
45                                cl::init(true), cl::cat(CodeGenOptions));
46#endif
47
48char OptLevel;
49static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
50                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
51
52
53static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
54
55static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
56
57
58int BlockSize;
59int SegmentSize;
60int BufferSegments;
61int ThreadNum;
62
63static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
64static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
65static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
66static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
67
68const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
69
70bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
71
72   
73#ifdef CUDA_ENABLED
74bool NVPTX;
75int GroupNum;
76static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
77static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
78#endif
79
80}
81
82
83#ifdef CUDA_ENABLED
84void setNVPTXOption(){
85    codegen::NVPTX = codegen::USENVPTX;
86}
87
88void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
89    InitializeAllTargets();
90    InitializeAllTargetMCs();
91    InitializeAllAsmPrinters();
92    InitializeAllAsmParsers();
93
94    PassRegistry *Registry = PassRegistry::getPassRegistry();
95    initializeCore(*Registry);
96    initializeCodeGen(*Registry);
97    initializeLoopStrengthReducePass(*Registry);
98    initializeLowerIntrinsicsPass(*Registry);
99    initializeUnreachableBlockElimPass(*Registry);
100
101    std::error_code error;
102    llvm::raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
103    m->print(out, nullptr);
104
105    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
106            m->dump();
107
108    llvm2ptx(IRFilename, PTXFilename);
109}
110#endif
111
112
113void setAllFeatures(EngineBuilder &builder) {
114    llvm::StringMap<bool> HostCPUFeatures;
115    if (llvm::sys::getHostCPUFeatures(HostCPUFeatures)) {
116        std::vector<std::string> attrs;
117        for (auto &flag : HostCPUFeatures) {
118            auto enabled = flag.second ? "+" : "-";
119            attrs.push_back(enabled + flag.first().str());
120        }
121        builder.setMAttrs(attrs);
122    }
123}
124
125bool AVX2_available() {
126    llvm::StringMap<bool> HostCPUFeatures;
127    if (llvm::sys::getHostCPUFeatures(HostCPUFeatures)) {
128        auto f = HostCPUFeatures.find("avx2");
129        return ((f != HostCPUFeatures.end()) && f->second);
130    }
131    return false;
132}
133
134#ifndef USE_LLVM_3_6
135void WriteAssembly (llvm::TargetMachine *TM, Module * m) {
136    llvm::legacy::PassManager PM;
137
138    llvm::SmallString<128> Str;
139    llvm::raw_svector_ostream dest(Str);
140
141    if (TM->addPassesToEmitFile(PM, dest, llvm::TargetMachine::CGFT_AssemblyFile ) ) {
142        throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
143    }
144    PM.run(*m);
145
146    if (codegen::ASMOutputFilename.empty()) {
147        errs() << Str;
148    } else {
149        std::error_code error;
150        llvm::raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
151        out << Str;
152    }
153}
154#endif
155
156ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
157
158    InitializeNativeTarget();
159    InitializeNativeTargetAsmPrinter();
160    InitializeNativeTargetAsmParser();
161
162    PassRegistry * Registry = PassRegistry::getPassRegistry();
163    initializeCore(*Registry);
164    initializeCodeGen(*Registry);
165    initializeLowerIntrinsicsPass(*Registry);
166
167    std::string errMessage;
168    EngineBuilder builder{std::unique_ptr<Module>(m)};
169    builder.setErrorStr(&errMessage);
170    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
171    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
172
173    builder.setTargetOptions(opts);
174    builder.setVerifyModules(true);
175    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
176    switch (codegen::OptLevel) {
177        case '0': optLevel = CodeGenOpt::None; break;
178        case '1': optLevel = CodeGenOpt::Less; break;
179        case '2': optLevel = CodeGenOpt::Default; break;
180        case '3': optLevel = CodeGenOpt::Aggressive; break;
181        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
182    }
183    builder.setOptLevel(optLevel);
184
185    setAllFeatures(builder);
186
187    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
188        if (codegen::IROutputFilename.empty()) {
189            m->dump();
190        } else {
191            std::error_code error;
192            llvm::raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
193            m->print(out, nullptr);
194        }
195    }
196#if LLVM_VERSION_MINOR > 6
197    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
198      WriteAssembly(builder.selectTarget(), m);
199    }
200#endif
201    ExecutionEngine * engine = builder.create();
202    if (engine == nullptr) {
203        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
204    }   
205    return engine;
206}
207
208void ApplyObjectCache(ExecutionEngine * e) {
209    ICGrepObjectCache * cache = nullptr;
210    if (codegen::EnableObjectCache) {
211        if (codegen::ObjectCacheDir.empty())
212            // Default is $HOME/.cache/icgrep
213            cache = new ICGrepObjectCache();
214        else
215            cache = new ICGrepObjectCache(codegen::ObjectCacheDir);
216        e->setObjectCache(cache);
217    }   
218}
219
220
Note: See TracBrowser for help on using the repository browser.