source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5425

Last change on this file since 5425 was 5425, checked in by nmedfort, 2 years ago

Changes towards separate compilation

File size: 14.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#ifndef NDEBUG
18#include <llvm/IR/Verifier.h>
19#include <boost/container/flat_set.hpp>
20#endif
21#include <llvm/PassRegistry.h>                     // for PassRegistry
22#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
23#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
24#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
25#include <llvm/Target/TargetOptions.h>             // for TargetOptions
26#include <llvm/Transforms/Scalar.h>
27#include <llvm/Transforms/Utils/Local.h>
28#include <llvm/IR/Module.h>
29#include <toolchain/object_cache.h>
30#include <toolchain/pipeline.h>
31#include <kernels/kernel.h>
32#include <sys/stat.h>
33
34using namespace llvm;
35using namespace parabix;
36
37namespace codegen {
38
39static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
40
41static cl::bits<DebugFlags>
42DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
43#ifndef USE_LLVM_3_6
44                        clEnumVal(ShowASM, "Print assembly code."),
45#endif
46                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
47                        clEnumValEnd), cl::cat(CodeGenOptions));
48
49static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
50#ifndef USE_LLVM_3_6
51static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
52static cl::opt<bool> AsmVerbose("asm-verbose",
53                                cl::desc("Add comments to directives."),
54                                cl::init(true), cl::cat(CodeGenOptions));
55#endif
56
57char OptLevel;
58static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
59                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
60
61
62static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
63
64static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
65
66
67int BlockSize;
68int SegmentSize;
69int BufferSegments;
70int ThreadNum;
71bool EnableAsserts;
72bool EnableCycleCounter;
73#ifndef NDEBUG
74#define IN_DEBUG_MODE true
75#else
76#define IN_DEBUG_MODE false
77#endif
78
79static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
80static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
81static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
82static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
83static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
84static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
85
86const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
87
88bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
89
90static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
91   
92static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
93   
94#ifdef CUDA_ENABLED
95bool NVPTX;
96int GroupNum;
97static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
98static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
99#endif
100
101}
102
103#ifdef CUDA_ENABLED
104void setNVPTXOption(){
105    codegen::NVPTX = codegen::USENVPTX;
106}
107
108void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
109    InitializeAllTargets();
110    InitializeAllTargetMCs();
111    InitializeAllAsmPrinters();
112    InitializeAllAsmParsers();
113
114    PassRegistry *Registry = PassRegistry::getPassRegistry();
115    initializeCore(*Registry);
116    initializeCodeGen(*Registry);
117    initializeLoopStrengthReducePass(*Registry);
118    initializeLowerIntrinsicsPass(*Registry);
119    initializeUnreachableBlockElimPass(*Registry);
120
121    std::error_code error;
122    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
123    m->print(out, nullptr);
124
125    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
126            m->dump();
127
128    llvm2ptx(IRFilename, PTXFilename);
129}
130#endif
131
132void printParabixVersion () {
133    raw_ostream &OS = outs();
134    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
135}
136
137void AddParabixVersionPrinter() {
138    cl::AddExtraVersionPrinter(&printParabixVersion);
139}
140
141void setAllFeatures(EngineBuilder &builder) {
142    StringMap<bool> HostCPUFeatures;
143    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
144        std::vector<std::string> attrs;
145        for (auto &flag : HostCPUFeatures) {
146            auto enabled = flag.second ? "+" : "-";
147            attrs.push_back(enabled + flag.first().str());
148        }
149        builder.setMAttrs(attrs);
150    }
151}
152
153bool AVX2_available() {
154    StringMap<bool> HostCPUFeatures;
155    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
156        auto f = HostCPUFeatures.find("avx2");
157        return ((f != HostCPUFeatures.end()) && f->second);
158    }
159    return false;
160}
161
162ParabixDriver::ParabixDriver(std::string && moduleName)
163: mContext(new llvm::LLVMContext())
164, mMainModule(new Module(moduleName, *mContext))
165, iBuilder(nullptr)
166, mTarget(nullptr)
167, mEngine(nullptr)
168, mCache(nullptr) {
169
170    InitializeNativeTarget();
171    InitializeNativeTargetAsmPrinter();
172    InitializeNativeTargetAsmParser();
173
174    PassRegistry * Registry = PassRegistry::getPassRegistry();
175    initializeCore(*Registry);
176    initializeCodeGen(*Registry);
177    initializeLowerIntrinsicsPass(*Registry);
178
179    std::string errMessage;
180    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
181    builder.setErrorStr(&errMessage);
182    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
183    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
184
185    builder.setTargetOptions(opts);
186    builder.setVerifyModules(IN_DEBUG_MODE);
187    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
188    switch (codegen::OptLevel) {
189        case '0': optLevel = CodeGenOpt::None; break;
190        case '1': optLevel = CodeGenOpt::Less; break;
191        case '2': optLevel = CodeGenOpt::Default; break;
192        case '3': optLevel = CodeGenOpt::Aggressive; break;
193        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
194    }
195    builder.setOptLevel(optLevel);
196
197    setAllFeatures(builder);
198    mEngine = builder.create();
199    if (mEngine == nullptr) {
200        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
201    }
202    mTarget = builder.selectTarget();
203    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
204        if (codegen::ObjectCacheDir.empty()) {
205            mCache = new ParabixObjectCache();
206        } else {
207            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
208        }
209        assert (mCache);
210        mEngine->setObjectCache(mCache);
211    }
212
213    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
214
215    iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
216    iBuilder->setDriver(this);
217}
218
219ExternalFileBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalFileBuffer> b, Value * externalBuf) {
220    mOwnedBuffers.push_back(std::move(b));
221    ExternalFileBuffer * rawBuf = cast<ExternalFileBuffer>(mOwnedBuffers.back().get());
222    rawBuf->setStreamSetBuffer(externalBuf);
223    return rawBuf;
224}
225
226StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
227    b->allocateBuffer();
228    mOwnedBuffers.push_back(std::move(b));
229    return mOwnedBuffers.back().get();
230}
231
232kernel::KernelBuilder * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::KernelBuilder> kb) {
233    mOwnedKernels.push_back(std::move(kb));
234    return mOwnedKernels.back().get();
235}
236
237void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
238    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
239    mPipeline.push_back(&kb);
240    kb.createKernelStub(inputs, outputs);
241}
242
243void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
244    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
245    mPipeline.push_back(kb);
246    kb->createKernelStub(inputs, outputs);
247}
248
249void ParabixDriver::generatePipelineIR() {
250    #ifndef NDEBUG
251    if (LLVM_UNLIKELY(mPipeline.empty())) {
252        report_fatal_error("Pipeline must contain at least one kernel");
253    } else {
254        boost::container::flat_set<kernel::KernelBuilder *> K(mPipeline.begin(), mPipeline.end());
255        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
256            report_fatal_error("Kernel definitions can only occur once in the pipeline");
257        }
258    }
259    #endif
260
261    // note: instantiation of all kernels must occur prior to initialization
262    for (const auto & k : mPipeline) {
263        k->addKernelDeclarations(mMainModule);
264    }
265    for (const auto & k : mPipeline) {
266        k->createInstance();
267    }
268    for (const auto & k : mPipeline) {
269        k->initializeInstance();
270    }
271    if (codegen::pipelineParallel) {
272        generateParallelPipeline(iBuilder, mPipeline);
273    } else if (codegen::segmentPipelineParallel) {
274        generateSegmentParallelPipeline(iBuilder, mPipeline);
275    } else {
276        codegen::ThreadNum = 1;
277        generatePipelineLoop(iBuilder, mPipeline);
278    }
279    for (const auto & k : mPipeline) {
280        k->finalizeInstance();
281    }
282}
283
284Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
285    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
286    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
287    mEngine->addGlobalMapping(f, functionPtr);
288    return f;
289}
290
291void ParabixDriver::linkAndFinalize() {
292    Module * m = mMainModule;
293    #ifndef NDEBUG
294    try {
295    #endif
296    legacy::PassManager PM;
297    #ifndef NDEBUG
298    PM.add(createVerifierPass());
299    #endif
300    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
301    PM.add(createReassociatePass());             //Reassociate expressions.
302    PM.add(createGVNPass());                     //Eliminate common subexpressions.
303    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
304    PM.add(createCFGSimplificationPass());
305
306    raw_fd_ostream * IROutputStream = nullptr;
307    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
308        if (codegen::IROutputFilename.empty()) {
309            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
310        } else {
311            std::error_code error;
312            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
313        }
314        PM.add(createPrintModulePass(*IROutputStream));
315    }
316
317    #ifndef USE_LLVM_3_6
318    raw_fd_ostream * ASMOutputStream = nullptr;
319    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
320        if (codegen::ASMOutputFilename.empty()) {
321            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
322        } else {
323            std::error_code error;
324            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
325        }
326        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
327            report_fatal_error("LLVM error: could not add emit assembly pass");
328        }
329    }
330    #endif
331
332    PM.run(*m);
333    for (kernel::KernelBuilder * const kb : mPipeline) {
334        m = kb->getModule();
335        bool uncachedObject = true;
336        if (mCache) {
337            const std::string moduleID = m->getModuleIdentifier();
338            const std::string signature = kb->generateKernelSignature(moduleID);
339            if (mCache->loadCachedObjectFile(moduleID, signature)) {
340                uncachedObject = false;
341            }
342        }
343        if (uncachedObject) {
344            Module * const cm = iBuilder->getModule();
345            iBuilder->setModule(m);
346            kb->generateKernel();
347            PM.run(*m);
348            iBuilder->setModule(cm);
349        }       
350        mEngine->addModule(std::unique_ptr<Module>(m));
351    }   
352    mEngine->finalizeObject();
353
354    delete IROutputStream;
355    #ifndef USE_LLVM_3_6
356    delete ASMOutputStream;
357    #endif
358    #ifndef NDEBUG
359    } catch (...) { m->dump(); throw; }
360    #endif
361}
362
363void * ParabixDriver::getPointerToMain() {
364    return mEngine->getPointerToNamedFunction("Main");
365}
366
367ParabixDriver::~ParabixDriver() {
368    delete mCache;
369}
Note: See TracBrowser for help on using the repository browser.