source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5454

Last change on this file since 5454 was 5454, checked in by nmedfort, 2 years ago

Bug fix check in for DumpTrace?, compilation of DoBlock? / DoFinalBlock? functions. Pablo CodeMotionPass? optimized and enabled by default.

File size: 18.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#include <llvm/PassRegistry.h>                     // for PassRegistry
18#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
19#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
20#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
21#include <llvm/Target/TargetOptions.h>             // for TargetOptions
22#include <llvm/Transforms/Scalar.h>
23#include <llvm/Transforms/Utils/Local.h>
24#include <llvm/IR/Module.h>
25#include <toolchain/object_cache.h>
26#include <toolchain/pipeline.h>
27#include <kernels/kernel_builder.h>
28#include <kernels/kernel.h>
29#include <sys/stat.h>
30#include <llvm/IR/Verifier.h>
31//#include <toolchain/workqueue.h>
32
33
34using namespace llvm;
35using namespace parabix;
36
37using Kernel = kernel::Kernel;
38using KernelBuilder = kernel::KernelBuilder;
39
40#ifndef NDEBUG
41#define IN_DEBUG_MODE true
42#else
43#define IN_DEBUG_MODE false
44#endif
45
46namespace codegen {
47
48static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
49
50static cl::bits<DebugFlags>
51DebugOptions(cl::values(clEnumVal(ShowUnoptimizedIR, "Print generated LLVM IR."),
52                        clEnumVal(ShowIR, "Print optimized LLVM IR."),
53                        clEnumVal(VerifyIR, "Run the IR verification pass."),
54#ifndef USE_LLVM_3_6
55                        clEnumVal(ShowASM, "Print assembly code."),
56#endif
57                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
58                        clEnumValEnd), cl::cat(CodeGenOptions));
59
60static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
61#ifndef USE_LLVM_3_6
62static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
63static cl::opt<bool> AsmVerbose("asm-verbose",
64                                cl::desc("Add comments to directives."),
65                                cl::init(true), cl::cat(CodeGenOptions));
66#endif
67
68char OptLevel;
69static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
70                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
71
72
73static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
74
75static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
76
77
78int BlockSize;
79int SegmentSize;
80int BufferSegments;
81int ThreadNum;
82bool EnableAsserts;
83bool EnableCycleCounter;
84
85static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
86static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
87static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
88static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
89static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
90static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
91
92const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
93
94bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
95
96static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
97   
98static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
99
100}
101
102void printParabixVersion () {
103    raw_ostream &OS = outs();
104    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
105}
106
107void AddParabixVersionPrinter() {
108    cl::AddExtraVersionPrinter(&printParabixVersion);
109}
110
111void setAllFeatures(EngineBuilder &builder) {
112    StringMap<bool> HostCPUFeatures;
113    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
114        std::vector<std::string> attrs;
115        for (auto &flag : HostCPUFeatures) {
116            auto enabled = flag.second ? "+" : "-";
117            attrs.push_back(enabled + flag.first().str());
118        }
119        builder.setMAttrs(attrs);
120    }
121}
122
123bool AVX2_available() {
124    StringMap<bool> HostCPUFeatures;
125    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
126        auto f = HostCPUFeatures.find("avx2");
127        return ((f != HostCPUFeatures.end()) && f->second);
128    }
129    return false;
130}
131
132ParabixDriver::ParabixDriver(std::string && moduleName)
133: mContext(new llvm::LLVMContext())
134, mMainModule(new Module(moduleName, *mContext))
135, iBuilder(nullptr)
136, mTarget(nullptr)
137, mEngine(nullptr)
138, mCache(nullptr) {
139
140    InitializeNativeTarget();
141    InitializeNativeTargetAsmPrinter();
142    InitializeNativeTargetAsmParser();
143
144    PassRegistry * Registry = PassRegistry::getPassRegistry();
145    initializeCore(*Registry);
146    initializeCodeGen(*Registry);
147    initializeLowerIntrinsicsPass(*Registry);
148
149    std::string errMessage;
150    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
151    builder.setUseOrcMCJITReplacement(true);
152    builder.setErrorStr(&errMessage);
153    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
154    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
155    builder.setTargetOptions(opts);
156    builder.setVerifyModules(false);
157    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
158    switch (codegen::OptLevel) {
159        case '0': optLevel = CodeGenOpt::None; break;
160        case '1': optLevel = CodeGenOpt::Less; break;
161        case '2': optLevel = CodeGenOpt::Default; break;
162        case '3': optLevel = CodeGenOpt::Aggressive; break;
163        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
164    }
165    builder.setOptLevel(optLevel);
166    setAllFeatures(builder);
167    mEngine = builder.create();
168    if (mEngine == nullptr) {
169        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
170    }
171    mTarget = builder.selectTarget();
172    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
173        if (codegen::ObjectCacheDir.empty()) {
174            mCache = new ParabixObjectCache();
175        } else {
176            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
177        }
178        mEngine->setObjectCache(mCache);
179    }
180
181    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
182
183    iBuilder.reset(IDISA::GetIDISA_Builder(*mContext, mMainModule->getTargetTriple()));
184    iBuilder->setDriver(this);
185    iBuilder->setModule(mMainModule);
186}
187
188ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
189    mOwnedBuffers.emplace_back(std::move(b));
190    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
191}
192
193StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
194    b->allocateBuffer(iBuilder);
195    mOwnedBuffers.emplace_back(std::move(b));
196    return mOwnedBuffers.back().get();
197}
198
199Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<Kernel> kb) {
200    mOwnedKernels.emplace_back(std::move(kb));
201    return mOwnedKernels.back().get();
202}
203
204void ParabixDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
205    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
206    mPipeline.emplace_back(&kb);
207    kb.bindPorts(inputs, outputs);
208    kb.makeModule(iBuilder);
209}
210
211void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
212    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
213    mPipeline.emplace_back(kb);   
214    kb->bindPorts(inputs, outputs);
215    kb->makeModule(iBuilder);
216}
217
218void ParabixDriver::generatePipelineIR() {
219    #ifndef NDEBUG
220    if (LLVM_UNLIKELY(mPipeline.empty())) {
221        report_fatal_error("Pipeline cannot be empty");
222    } else {
223        for (auto i = mPipeline.begin(); i != mPipeline.end(); ++i) {
224            for (auto j = i; ++j != mPipeline.end(); ) {
225                if (LLVM_UNLIKELY(*i == *j)) {
226                    report_fatal_error("Kernel instances cannot occur twice in the pipeline");
227                }
228            }
229        }
230    }
231    #endif
232    // note: instantiation of all kernels must occur prior to initialization
233    for (const auto & k : mPipeline) {
234        k->addKernelDeclarations(iBuilder);
235    }
236    for (const auto & k : mPipeline) {
237        k->createInstance(iBuilder);
238    }
239    for (const auto & k : mPipeline) {
240        k->initializeInstance(iBuilder);
241    }
242    if (codegen::pipelineParallel) {
243        generateParallelPipeline(iBuilder, mPipeline);
244    } else if (codegen::segmentPipelineParallel) {
245        generateSegmentParallelPipeline(iBuilder, mPipeline);
246    } else {
247        codegen::ThreadNum = 1;
248        generatePipelineLoop(iBuilder, mPipeline);
249    }
250    for (const auto & k : mPipeline) {
251        k->finalizeInstance(iBuilder);
252    }
253}
254
255Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
256    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
257    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
258    mEngine->addGlobalMapping(f, functionPtr);
259    return f;
260}
261
262void ParabixDriver::linkAndFinalize() {
263
264    legacy::PassManager PM;
265    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
266    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
267        if (codegen::IROutputFilename.empty()) {
268            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
269        } else {
270            std::error_code error;
271            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
272        }
273        PM.add(createPrintModulePass(*IROutputStream));
274    }
275
276    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
277        PM.add(createVerifierPass());
278    }
279    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
280    PM.add(createReassociatePass());             //Reassociate expressions.
281    PM.add(createGVNPass());                     //Eliminate common subexpressions.
282    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
283    PM.add(createCFGSimplificationPass());
284    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
285        if (LLVM_LIKELY(IROutputStream == nullptr)) {
286            if (codegen::IROutputFilename.empty()) {
287                IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
288            } else {
289                std::error_code error;
290                IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
291            }
292        }
293        PM.add(createPrintModulePass(*IROutputStream));
294    }
295
296    #ifndef USE_LLVM_3_6
297    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
298    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
299        if (codegen::ASMOutputFilename.empty()) {
300            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
301        } else {
302            std::error_code error;
303            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
304        }
305        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
306            report_fatal_error("LLVM error: could not add emit assembly pass");
307        }
308    }
309    #endif
310
311    Module * module = nullptr;
312
313    try {
314
315        for (Kernel * const kernel : mPipeline) {
316            iBuilder->setKernel(kernel);
317            module = kernel->getModule();
318            bool uncachedObject = true;
319            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
320                uncachedObject = false;
321            }
322            if (uncachedObject) {
323                module->setTargetTriple(mMainModule->getTargetTriple());
324                kernel->generateKernel(iBuilder);
325                PM.run(*module);
326            }
327            mEngine->addModule(std::unique_ptr<Module>(module));
328            mEngine->generateCodeForModule(module);
329        }
330
331        iBuilder->setKernel(nullptr);
332        module = mMainModule;
333        PM.run(*mMainModule);
334
335        mEngine->finalizeObject();
336
337    } catch (const std::exception & e) {
338        report_fatal_error(e.what());
339    }
340
341}
342
343
344//void ParabixDriver::linkAndFinalize() {
345
346//    legacy::PassManager PM;
347//    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
348//        PM.add(createVerifierPass());
349//    }
350//    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
351//    PM.add(createReassociatePass());             //Reassociate expressions.
352//    PM.add(createGVNPass());                     //Eliminate common subexpressions.
353//    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
354//    PM.add(createCFGSimplificationPass());
355
356//    unsigned threadCount = 2; //std::thread::hardware_concurrency();
357
358//    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
359//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
360//        threadCount = 1; // If we're dumping IR, disable seperate compilation
361//        if (codegen::IROutputFilename.empty()) {
362//            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
363//        } else {
364//            std::error_code error;
365//            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
366//        }
367//        PM.add(createPrintModulePass(*IROutputStream));
368//    }
369
370//    #ifndef USE_LLVM_3_6
371//    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
372//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
373//        threadCount = 1; // If we're dumping ASM, disable seperate compilation
374//        if (codegen::ASMOutputFilename.empty()) {
375//            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
376//        } else {
377//            std::error_code error;
378//            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
379//        }
380//        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
381//            report_fatal_error("LLVM error: could not add emit assembly pass");
382//        }
383//    }
384//    #endif
385
386//    Module * module = mMainModule;
387//    WorkQueue<Module *> Q(mPipeline.size());
388//    std::thread compilation_thread[threadCount - 1];
389
390//    try {
391
392//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
393//            compilation_thread[i] = std::thread([this, &Q]{
394
395//                InitializeNativeTarget();
396
397//                Module * module = nullptr;
398//                while (Q.pop(module)) {
399//                    mEngine->addModule(std::unique_ptr<Module>(module));
400//                    mEngine->generateCodeForModule(module);
401//                }
402//            });
403//        }
404
405//        module = mMainModule;
406//        iBuilder->setKernel(nullptr);
407//        PM.run(*mMainModule);
408//        Q.push(mMainModule);
409
410//        for (Kernel * const kernel : mPipeline) {
411//            iBuilder->setKernel(kernel);
412//            module = kernel->getModule();
413//            bool uncachedObject = true;
414//            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
415//                uncachedObject = false;
416//            }
417//            if (uncachedObject) {
418//                module->setTargetTriple(mMainModule->getTargetTriple());
419//                kernel->generateKernel(iBuilder);
420//                PM.run(*module);
421//            }
422//            Q.push(module);
423//        }
424
425//        for (;;) {
426//            if (Q.empty()) {
427//                break;
428//            } else if (Q.try_pop(module)) {
429//                mEngine->addModule(std::unique_ptr<Module>(module));
430//                mEngine->generateCodeForModule(module);
431//            }
432//        }
433
434//        Q.notify_all();
435//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
436//            compilation_thread[i].join();
437//        }
438
439//        assert (Q.empty());
440
441//        mEngine->finalizeObject();
442
443//    } catch (const std::exception & e) {
444//        module->dump();
445//        report_fatal_error(e.what());
446//    }
447
448//}
449
450const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
451    return iBuilder;
452}
453
454void * ParabixDriver::getPointerToMain() {
455    return mEngine->getPointerToNamedFunction("Main");
456}
457
458ParabixDriver::~ParabixDriver() {
459    delete mCache;
460}
Note: See TracBrowser for help on using the repository browser.