source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 5401

Last change on this file since 5401 was 5401, checked in by nmedfort, 3 years ago

Updated all projects to use ParabixDriver?. Deprecated original pipeline generation methods. Enabled LLVM optimizations, IR and ASM printing for Kernel modules. Enabled object cache by default. Begun work on moving consumed position information back to producing kernels.

File size: 12.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/Support/FormattedStream.h>
14#include <llvm/ADT/SmallString.h>                  // for SmallString
15#include <llvm/IR/LegacyPassManager.h>             // for PassManager
16#include <llvm/IR/IRPrintingPasses.h>
17#include <llvm/IR/Verifier.h>
18#include <llvm/InitializePasses.h>                 // for initializeCodeGen
19#include <llvm/PassRegistry.h>                     // for PassRegistry
20#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
21#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
22#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
23#include <llvm/Target/TargetOptions.h>             // for TargetOptions
24#include <llvm/Transforms/Scalar.h>
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/IR/Module.h>
27#include <object_cache.h>
28#include <kernels/pipeline.h>
29#include <kernels/interface.h>
30#include <kernels/kernel.h>
31#ifdef CUDA_ENABLED
32#include <IR_Gen/llvm2ptx.h>
33#endif
34 
35
36
37using namespace llvm;
38
39namespace codegen {
40
41static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
42
43static cl::bits<DebugFlags>
44DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
45#ifndef USE_LLVM_3_6
46                        clEnumVal(ShowASM, "Print assembly code."),
47#endif
48                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
49                        clEnumValEnd), cl::cat(CodeGenOptions));
50
51static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
52#ifndef USE_LLVM_3_6
53static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
54static cl::opt<bool> AsmVerbose("asm-verbose",
55                                cl::desc("Add comments to directives."),
56                                cl::init(true), cl::cat(CodeGenOptions));
57#endif
58
59char OptLevel;
60static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
61                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
62
63
64static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
65
66static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
67
68
69int BlockSize;
70int SegmentSize;
71int BufferSegments;
72int ThreadNum;
73bool EnableAsserts;
74#ifndef NDEBUG
75#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
76#else
77#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
78#endif
79
80static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
81static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
82static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
83static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
84static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
85
86const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
87
88bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
89
90static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
91   
92static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
93   
94#ifdef CUDA_ENABLED
95bool NVPTX;
96int GroupNum;
97static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
98static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
99#endif
100
101}
102
103#ifdef CUDA_ENABLED
104void setNVPTXOption(){
105    codegen::NVPTX = codegen::USENVPTX;
106}
107
108void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
109    InitializeAllTargets();
110    InitializeAllTargetMCs();
111    InitializeAllAsmPrinters();
112    InitializeAllAsmParsers();
113
114    PassRegistry *Registry = PassRegistry::getPassRegistry();
115    initializeCore(*Registry);
116    initializeCodeGen(*Registry);
117    initializeLoopStrengthReducePass(*Registry);
118    initializeLowerIntrinsicsPass(*Registry);
119    initializeUnreachableBlockElimPass(*Registry);
120
121    std::error_code error;
122    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
123    m->print(out, nullptr);
124
125    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
126            m->dump();
127
128    llvm2ptx(IRFilename, PTXFilename);
129}
130#endif
131
132void printParabixVersion () {
133    raw_ostream &OS = outs();
134    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
135}
136
137void AddParabixVersionPrinter() {
138    cl::AddExtraVersionPrinter(&printParabixVersion);
139}
140
141void setAllFeatures(EngineBuilder &builder) {
142    StringMap<bool> HostCPUFeatures;
143    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
144        std::vector<std::string> attrs;
145        for (auto &flag : HostCPUFeatures) {
146            auto enabled = flag.second ? "+" : "-";
147            attrs.push_back(enabled + flag.first().str());
148        }
149        builder.setMAttrs(attrs);
150    }
151}
152
153bool AVX2_available() {
154    StringMap<bool> HostCPUFeatures;
155    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
156        auto f = HostCPUFeatures.find("avx2");
157        return ((f != HostCPUFeatures.end()) && f->second);
158    }
159    return false;
160}
161
162ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
163: iBuilder(iBuilder)
164, mMainModule(iBuilder->getModule())
165, mTarget(nullptr)
166, mEngine(nullptr)
167{
168    InitializeNativeTarget();
169    InitializeNativeTargetAsmPrinter();
170    InitializeNativeTargetAsmParser();
171
172    PassRegistry * Registry = PassRegistry::getPassRegistry();
173    initializeCore(*Registry);
174    initializeCodeGen(*Registry);
175    initializeLowerIntrinsicsPass(*Registry);
176
177    std::string errMessage;
178    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
179    builder.setErrorStr(&errMessage);
180    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
181    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
182
183    builder.setTargetOptions(opts);
184    builder.setVerifyModules(true);
185    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
186    switch (codegen::OptLevel) {
187        case '0': optLevel = CodeGenOpt::None; break;
188        case '1': optLevel = CodeGenOpt::Less; break;
189        case '2': optLevel = CodeGenOpt::Default; break;
190        case '3': optLevel = CodeGenOpt::Aggressive; break;
191        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
192    }
193    builder.setOptLevel(optLevel);
194
195    setAllFeatures(builder);
196
197    mEngine = builder.create();
198    if (mEngine == nullptr) {
199        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
200    }
201    mTarget = builder.selectTarget();
202    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
203        if (codegen::ObjectCacheDir.empty()) {
204            mCache = llvm::make_unique<ParabixObjectCache>();
205        } else {
206            mCache = llvm::make_unique<ParabixObjectCache>(codegen::ObjectCacheDir);
207        }
208        assert (mCache);
209        mEngine->setObjectCache(mCache.get());
210    }
211}
212
213void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
214    assert (mModuleMap.count(&kb) == 0);
215    mKernelList.push_back(&kb);
216    mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
217}
218
219void ParabixDriver::generatePipelineIR() {
220    for (kernel::KernelBuilder * kb : mKernelList) {
221        kb->addKernelDeclarations(mMainModule);
222    }
223    if (codegen::pipelineParallel) {
224        generateParallelPipeline(iBuilder, mKernelList);
225    } else if (codegen::segmentPipelineParallel) {
226        generateSegmentParallelPipeline(iBuilder, mKernelList);
227    } else {
228        codegen::ThreadNum = 1;
229        generatePipelineLoop(iBuilder, mKernelList);
230    }
231}
232
233void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
234    const auto f = mModuleMap.find(&kb);
235    assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
236    mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
237}
238
239void ParabixDriver::linkAndFinalize() {
240    Module * m = mMainModule;
241    #ifndef NDEBUG
242    try {
243    #endif
244    legacy::PassManager PM;
245    #ifndef NDEBUG
246    PM.add(createVerifierPass());
247    #endif
248    PM.add(createReassociatePass());             //Reassociate expressions.
249    PM.add(createGVNPass());                     //Eliminate common subexpressions.
250    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
251    PM.add(createCFGSimplificationPass());
252
253    raw_fd_ostream * IROutputStream = nullptr;
254    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
255        if (codegen::IROutputFilename.empty()) {
256            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
257        } else {
258            std::error_code error;
259            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
260        }
261        PM.add(createPrintModulePass(*IROutputStream));
262    }
263
264    #ifndef USE_LLVM_3_6
265    raw_fd_ostream * ASMOutputStream = nullptr;
266    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
267        if (codegen::ASMOutputFilename.empty()) {
268            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
269        } else {
270            std::error_code error;
271            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
272        }
273        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
274            report_fatal_error("LLVM error: could not add emit assembly pass");
275        }
276    }
277    #endif
278
279    PM.run(*m);
280    for (auto pair : mModuleMap) {
281        kernel::KernelBuilder * const kb = std::get<0>(pair);
282        m = std::get<1>(pair);
283        bool uncachedObject = true;
284        if (mCache) {
285            const std::string moduleID = m->getModuleIdentifier();
286            const std::string signature = kb->generateKernelSignature(moduleID);
287            if (mCache->loadCachedObjectFile(moduleID, signature)) {
288                uncachedObject = false;
289            }
290        }
291        if (uncachedObject) {
292            Module * const cm = iBuilder->getModule();
293            iBuilder->setModule(m);
294            kb->generateKernel();
295            PM.run(*m);
296            iBuilder->setModule(cm);
297        }       
298        mEngine->addModule(std::unique_ptr<Module>(m));
299    }   
300    mEngine->finalizeObject();
301
302    delete IROutputStream;
303    #ifndef USE_LLVM_3_6
304    delete ASMOutputStream;
305    #endif
306    #ifndef NDEBUG
307    } catch (...) { m->dump(); throw; }
308    #endif
309    mModuleMap.clear();
310}
311
312void * ParabixDriver::getPointerToMain() {
313    return mEngine->getPointerToNamedFunction("Main");
314}
Note: See TracBrowser for help on using the repository browser.