source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5431

Last change on this file since 5431 was 5431, checked in by nmedfort, 2 years ago

partial refactoring check in with change for Linda.

File size: 13.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#ifndef NDEBUG
18#include <llvm/IR/Verifier.h>
19#include <boost/container/flat_set.hpp>
20#endif
21#include <llvm/PassRegistry.h>                     // for PassRegistry
22#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
23#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
24#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
25#include <llvm/Target/TargetOptions.h>             // for TargetOptions
26#include <llvm/Transforms/Scalar.h>
27#include <llvm/Transforms/Utils/Local.h>
28#include <llvm/IR/Module.h>
29#include <toolchain/object_cache.h>
30#include <toolchain/pipeline.h>
31#include <kernels/kernel.h>
32#include <sys/stat.h>
33
34using namespace llvm;
35using namespace parabix;
36
37namespace codegen {
38
39static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
40
41static cl::bits<DebugFlags>
42DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
43#ifndef USE_LLVM_3_6
44                        clEnumVal(ShowASM, "Print assembly code."),
45#endif
46                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
47                        clEnumValEnd), cl::cat(CodeGenOptions));
48
49static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
50#ifndef USE_LLVM_3_6
51static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
52static cl::opt<bool> AsmVerbose("asm-verbose",
53                                cl::desc("Add comments to directives."),
54                                cl::init(true), cl::cat(CodeGenOptions));
55#endif
56
57char OptLevel;
58static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
59                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
60
61
62static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
63
64static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
65
66
67int BlockSize;
68int SegmentSize;
69int BufferSegments;
70int ThreadNum;
71bool EnableAsserts;
72bool EnableCycleCounter;
73#ifndef NDEBUG
74#define IN_DEBUG_MODE true
75#else
76#define IN_DEBUG_MODE false
77#endif
78
79static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
80static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
81static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
82static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
83static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
84static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
85
86const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
87
88bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
89
90static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
91   
92static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
93
94}
95
96void printParabixVersion () {
97    raw_ostream &OS = outs();
98    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
99}
100
101void AddParabixVersionPrinter() {
102    cl::AddExtraVersionPrinter(&printParabixVersion);
103}
104
105void setAllFeatures(EngineBuilder &builder) {
106    StringMap<bool> HostCPUFeatures;
107    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
108        std::vector<std::string> attrs;
109        for (auto &flag : HostCPUFeatures) {
110            auto enabled = flag.second ? "+" : "-";
111            attrs.push_back(enabled + flag.first().str());
112        }
113        builder.setMAttrs(attrs);
114    }
115}
116
117bool AVX2_available() {
118    StringMap<bool> HostCPUFeatures;
119    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
120        auto f = HostCPUFeatures.find("avx2");
121        return ((f != HostCPUFeatures.end()) && f->second);
122    }
123    return false;
124}
125
126ParabixDriver::ParabixDriver(std::string && moduleName)
127: mContext(new llvm::LLVMContext())
128, mMainModule(new Module(moduleName, *mContext))
129, iBuilder(nullptr)
130, mTarget(nullptr)
131, mEngine(nullptr)
132, mCache(nullptr) {
133
134    InitializeNativeTarget();
135    InitializeNativeTargetAsmPrinter();
136    InitializeNativeTargetAsmParser();
137
138    PassRegistry * Registry = PassRegistry::getPassRegistry();
139    initializeCore(*Registry);
140    initializeCodeGen(*Registry);
141    initializeLowerIntrinsicsPass(*Registry);
142
143    std::string errMessage;
144    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
145    builder.setErrorStr(&errMessage);
146    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
147    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
148
149    builder.setTargetOptions(opts);
150    builder.setVerifyModules(IN_DEBUG_MODE);
151    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
152    switch (codegen::OptLevel) {
153        case '0': optLevel = CodeGenOpt::None; break;
154        case '1': optLevel = CodeGenOpt::Less; break;
155        case '2': optLevel = CodeGenOpt::Default; break;
156        case '3': optLevel = CodeGenOpt::Aggressive; break;
157        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
158    }
159    builder.setOptLevel(optLevel);
160
161    setAllFeatures(builder);
162    mEngine = builder.create();
163    if (mEngine == nullptr) {
164        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
165    }
166    mTarget = builder.selectTarget();
167    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
168        if (codegen::ObjectCacheDir.empty()) {
169            mCache = new ParabixObjectCache();
170        } else {
171            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
172        }
173        assert (mCache);
174        mEngine->setObjectCache(mCache);
175    }
176
177    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
178
179    iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
180    iBuilder->setDriver(this);
181}
182
183ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
184    mOwnedBuffers.emplace_back(std::move(b));
185    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
186}
187
188StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
189    b->allocateBuffer(iBuilder.get());
190    mOwnedBuffers.emplace_back(std::move(b));
191    return mOwnedBuffers.back().get();
192}
193
194kernel::KernelBuilder * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::KernelBuilder> kb) {
195    kb->setBuilder(iBuilder.get());
196    mOwnedKernels.emplace_back(std::move(kb));
197    return mOwnedKernels.back().get();
198}
199
200void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
201    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
202    mPipeline.emplace_back(&kb);
203    kb.createKernelStub(inputs, outputs);
204}
205
206void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
207    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
208    mPipeline.emplace_back(kb);
209    kb->createKernelStub(inputs, outputs);
210}
211
212void ParabixDriver::generatePipelineIR() {
213    #ifndef NDEBUG
214    if (LLVM_UNLIKELY(mPipeline.empty())) {
215        report_fatal_error("Pipeline must contain at least one kernel");
216    } else {
217        boost::container::flat_set<kernel::KernelBuilder *> K(mPipeline.begin(), mPipeline.end());
218        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
219            report_fatal_error("Kernel definitions can only occur once in the pipeline");
220        }
221    }
222    #endif
223
224    // note: instantiation of all kernels must occur prior to initialization
225    for (const auto & k : mPipeline) {
226        k->addKernelDeclarations();
227    }
228    for (const auto & k : mPipeline) {
229        k->createInstance();
230    }
231    for (const auto & k : mPipeline) {
232        k->initializeInstance();
233    }
234    if (codegen::pipelineParallel) {
235        generateParallelPipeline(iBuilder, mPipeline);
236    } else if (codegen::segmentPipelineParallel) {
237        generateSegmentParallelPipeline(iBuilder, mPipeline);
238    } else {
239        codegen::ThreadNum = 1;
240        generatePipelineLoop(iBuilder, mPipeline);
241    }
242    for (const auto & k : mPipeline) {
243        k->finalizeInstance();
244    }
245}
246
247Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
248    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
249    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
250    mEngine->addGlobalMapping(f, functionPtr);
251    return f;
252}
253
254void ParabixDriver::linkAndFinalize() {
255    Module * m = mMainModule;
256    #ifndef NDEBUG
257    try {
258    #endif
259    legacy::PassManager PM;
260    #ifndef NDEBUG
261    PM.add(createVerifierPass());
262    #endif
263    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
264    PM.add(createReassociatePass());             //Reassociate expressions.
265    PM.add(createGVNPass());                     //Eliminate common subexpressions.
266    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
267    PM.add(createCFGSimplificationPass());
268
269    raw_fd_ostream * IROutputStream = nullptr;
270    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
271        if (codegen::IROutputFilename.empty()) {
272            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
273        } else {
274            std::error_code error;
275            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
276        }
277        PM.add(createPrintModulePass(*IROutputStream));
278    }
279
280    #ifndef USE_LLVM_3_6
281    raw_fd_ostream * ASMOutputStream = nullptr;
282    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
283        if (codegen::ASMOutputFilename.empty()) {
284            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
285        } else {
286            std::error_code error;
287            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
288        }
289        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
290            report_fatal_error("LLVM error: could not add emit assembly pass");
291        }
292    }
293    #endif
294
295    PM.run(*m);
296
297    for (kernel::KernelBuilder * const kb : mPipeline) {
298        m = kb->getModule();
299        bool uncachedObject = true;
300        if (mCache && mCache->loadCachedObjectFile(kb)) {
301            uncachedObject = false;
302        }
303        if (uncachedObject) {
304            iBuilder->setModule(kb->getModule());
305            kb->generateKernel();
306            PM.run(*m);
307        }
308        mEngine->addModule(std::unique_ptr<Module>(m));
309    }   
310    mEngine->finalizeObject();
311
312    iBuilder->setModule(mMainModule);
313
314    delete IROutputStream;
315    #ifndef USE_LLVM_3_6
316    delete ASMOutputStream;
317    #endif
318    #ifndef NDEBUG
319    } catch (...) { m->dump(); throw; }
320    #endif
321}
322
323void * ParabixDriver::getPointerToMain() {
324    return mEngine->getPointerToNamedFunction("Main");
325}
326
327ParabixDriver::~ParabixDriver() {
328    delete mCache;
329}
Note: See TracBrowser for help on using the repository browser.