source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5435

Last change on this file since 5435 was 5435, checked in by nmedfort, 2 years ago

Continued refactoring work.

File size: 17.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#ifndef NDEBUG
18#include <llvm/IR/Verifier.h>
19#include <boost/container/flat_set.hpp>
20#endif
21#include <llvm/PassRegistry.h>                     // for PassRegistry
22#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
23#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
24#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
25#include <llvm/Target/TargetOptions.h>             // for TargetOptions
26#include <llvm/Transforms/Scalar.h>
27#include <llvm/Transforms/Utils/Local.h>
28#include <llvm/IR/Module.h>
29#include <toolchain/object_cache.h>
30#include <toolchain/pipeline.h>
31#include <kernels/kernel.h>
32#include <sys/stat.h>
33#include <thread>
34#include <boost/lockfree/queue.hpp>
35
36using namespace llvm;
37using namespace parabix;
38
39namespace codegen {
40
41static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
42
43static cl::bits<DebugFlags>
44DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
45#ifndef USE_LLVM_3_6
46                        clEnumVal(ShowASM, "Print assembly code."),
47#endif
48                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
49                        clEnumValEnd), cl::cat(CodeGenOptions));
50
51static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
52#ifndef USE_LLVM_3_6
53static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
54static cl::opt<bool> AsmVerbose("asm-verbose",
55                                cl::desc("Add comments to directives."),
56                                cl::init(true), cl::cat(CodeGenOptions));
57#endif
58
59char OptLevel;
60static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
61                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
62
63
64static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
65
66static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
67
68
69int BlockSize;
70int SegmentSize;
71int BufferSegments;
72int ThreadNum;
73bool EnableAsserts;
74bool EnableCycleCounter;
75#ifndef NDEBUG
76#define IN_DEBUG_MODE true
77#else
78#define IN_DEBUG_MODE false
79#endif
80
81static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
82static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
83static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
84static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
85static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
86static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
87
88const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
89
90bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
91
92static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
93   
94static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
95
96}
97
98void printParabixVersion () {
99    raw_ostream &OS = outs();
100    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
101}
102
103void AddParabixVersionPrinter() {
104    cl::AddExtraVersionPrinter(&printParabixVersion);
105}
106
107void setAllFeatures(EngineBuilder &builder) {
108    StringMap<bool> HostCPUFeatures;
109    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
110        std::vector<std::string> attrs;
111        for (auto &flag : HostCPUFeatures) {
112            auto enabled = flag.second ? "+" : "-";
113            attrs.push_back(enabled + flag.first().str());
114        }
115        builder.setMAttrs(attrs);
116    }
117}
118
119bool AVX2_available() {
120    StringMap<bool> HostCPUFeatures;
121    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
122        auto f = HostCPUFeatures.find("avx2");
123        return ((f != HostCPUFeatures.end()) && f->second);
124    }
125    return false;
126}
127
128ParabixDriver::ParabixDriver(std::string && moduleName)
129: mContext(new llvm::LLVMContext())
130, mMainModule(new Module(moduleName, *mContext))
131, iBuilder(nullptr)
132, mTarget(nullptr)
133, mEngine(nullptr)
134, mCache(nullptr) {
135
136    InitializeNativeTarget();
137    InitializeNativeTargetAsmPrinter();
138    InitializeNativeTargetAsmParser();
139
140    PassRegistry * Registry = PassRegistry::getPassRegistry();
141    initializeCore(*Registry);
142    initializeCodeGen(*Registry);
143    initializeLowerIntrinsicsPass(*Registry);
144
145    std::string errMessage;
146    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
147    builder.setErrorStr(&errMessage);
148    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
149    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
150
151    builder.setTargetOptions(opts);
152    builder.setVerifyModules(IN_DEBUG_MODE);
153    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
154    switch (codegen::OptLevel) {
155        case '0': optLevel = CodeGenOpt::None; break;
156        case '1': optLevel = CodeGenOpt::Less; break;
157        case '2': optLevel = CodeGenOpt::Default; break;
158        case '3': optLevel = CodeGenOpt::Aggressive; break;
159        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
160    }
161    builder.setOptLevel(optLevel);
162
163    setAllFeatures(builder);
164    mEngine = builder.create();
165    if (mEngine == nullptr) {
166        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
167    }
168    mTarget = builder.selectTarget();
169    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
170        if (codegen::ObjectCacheDir.empty()) {
171            mCache = new ParabixObjectCache();
172        } else {
173            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
174        }
175        assert (mCache);
176        mEngine->setObjectCache(mCache);
177    }
178
179    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
180
181    iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
182    iBuilder->setDriver(this);
183}
184
185ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
186    mOwnedBuffers.emplace_back(std::move(b));
187    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
188}
189
190StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
191    b->allocateBuffer(iBuilder);
192    mOwnedBuffers.emplace_back(std::move(b));
193    return mOwnedBuffers.back().get();
194}
195
196kernel::Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::Kernel> kb) {
197    mOwnedKernels.emplace_back(std::move(kb));
198    return mOwnedKernels.back().get();
199}
200
201void ParabixDriver::addKernelCall(kernel::Kernel & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
202    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
203    mPipeline.emplace_back(&kb);
204    kb.setBuilder(iBuilder.get());
205    kb.createKernelStub(inputs, outputs);
206}
207
208void ParabixDriver::makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
209    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
210    mPipeline.emplace_back(kb);
211    kb->setBuilder(iBuilder.get());
212    kb->createKernelStub(inputs, outputs);
213}
214
215void ParabixDriver::generatePipelineIR() {
216    #ifndef NDEBUG
217    if (LLVM_UNLIKELY(mPipeline.empty())) {
218        report_fatal_error("Pipeline must contain at least one kernel");
219    } else {
220        boost::container::flat_set<kernel::Kernel *> K(mPipeline.begin(), mPipeline.end());
221        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
222            report_fatal_error("Kernel definitions can only occur once in the pipeline");
223        }
224    }
225    #endif
226
227    // note: instantiation of all kernels must occur prior to initialization
228    for (const auto & k : mPipeline) {
229        k->setBuilder(iBuilder.get());
230        k->addKernelDeclarations();
231    }
232    for (const auto & k : mPipeline) {
233        k->setBuilder(iBuilder.get());
234        k->createInstance();
235    }
236    for (const auto & k : mPipeline) {
237        k->setBuilder(iBuilder.get());
238        k->initializeInstance();
239    }
240    if (codegen::pipelineParallel) {
241        generateParallelPipeline(iBuilder.get(), mPipeline);
242    } else if (codegen::segmentPipelineParallel) {
243        generateSegmentParallelPipeline(iBuilder.get(), mPipeline);
244    } else {
245        codegen::ThreadNum = 1;
246        generatePipelineLoop(iBuilder.get(), mPipeline);
247    }
248    for (const auto & k : mPipeline) {
249        k->setBuilder(iBuilder.get());
250        k->finalizeInstance();
251    }
252}
253
254Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
255    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
256    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
257    mEngine->addGlobalMapping(f, functionPtr);
258    return f;
259}
260
261void ParabixDriver::linkAndFinalize() {
262    Module * m = mMainModule;
263    #ifndef NDEBUG
264    try {
265    #endif
266    legacy::PassManager PM;
267    #ifndef NDEBUG
268    PM.add(createVerifierPass());
269    #endif
270    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
271    PM.add(createReassociatePass());             //Reassociate expressions.
272    PM.add(createGVNPass());                     //Eliminate common subexpressions.
273    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
274    PM.add(createCFGSimplificationPass());
275
276    raw_fd_ostream * IROutputStream = nullptr;
277    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
278        if (codegen::IROutputFilename.empty()) {
279            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
280        } else {
281            std::error_code error;
282            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
283        }
284        PM.add(createPrintModulePass(*IROutputStream));
285    }
286
287    #ifndef USE_LLVM_3_6
288    raw_fd_ostream * ASMOutputStream = nullptr;
289    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
290        if (codegen::ASMOutputFilename.empty()) {
291            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
292        } else {
293            std::error_code error;
294            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
295        }
296        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
297            report_fatal_error("LLVM error: could not add emit assembly pass");
298        }
299    }
300    #endif
301
302    PM.run(*m);
303
304    for (kernel::Kernel * const k : mPipeline) {
305        m = k->getModule();
306        bool uncachedObject = true;
307        if (mCache && mCache->loadCachedObjectFile(k)) {
308            uncachedObject = false;
309        }
310        if (uncachedObject) {
311            iBuilder->setModule(m);
312            k->setBuilder(iBuilder.get());
313            k->generateKernel();
314            PM.run(*m);
315        }
316        mEngine->addModule(std::unique_ptr<Module>(m));
317    }   
318    mEngine->finalizeObject();
319
320    iBuilder->setModule(mMainModule);
321
322    delete IROutputStream;
323    #ifndef USE_LLVM_3_6
324    delete ASMOutputStream;
325    #endif
326    #ifndef NDEBUG
327    } catch (...) { m->dump(); throw; }
328    #endif
329}
330
331void * ParabixDriver::getPointerToMain() {
332    return mEngine->getPointerToNamedFunction("Main");
333}
334
335ParabixDriver::~ParabixDriver() {
336    delete mCache;
337}
338
339
340//void ParabixDriver::linkAndFinalize() {
341
342//    using KernelQueue = boost::lockfree::queue<kernel::KernelBuilder *>;
343
344//    legacy::PassManager PM;
345//    #ifndef NDEBUG
346//    PM.add(createVerifierPass());
347//    #endif
348//    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
349//    PM.add(createReassociatePass());             //Reassociate expressions.
350//    PM.add(createGVNPass());                     //Eliminate common subexpressions.
351//    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
352//    PM.add(createCFGSimplificationPass());
353
354//    raw_fd_ostream * IROutputStream = nullptr;
355//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
356//        if (codegen::IROutputFilename.empty()) {
357//            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
358//        } else {
359//            std::error_code error;
360//            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
361//        }
362//        PM.add(createPrintModulePass(*IROutputStream));
363//        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
364//    }
365
366//    #ifndef USE_LLVM_3_6
367//    raw_fd_ostream * ASMOutputStream = nullptr;
368//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
369//        if (codegen::ASMOutputFilename.empty()) {
370//            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
371//        } else {
372//            std::error_code error;
373//            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
374//        }
375//        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
376//            report_fatal_error("LLVM error: could not add emit assembly pass");
377//        }
378//        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
379//    }
380//    #endif
381
382//    KernelQueue Q(mPipeline.size() + 1);
383//    for (kernel::KernelBuilder * kb : mPipeline) {
384//        assert (kb);
385//        Q.unsynchronized_push(kb);
386//    }
387
388//    std::thread compilation_thread[codegen::Jobs];
389//    for (int i = 0; i < codegen::Jobs; ++i) {
390//        compilation_thread[i] = std::thread([&]{
391//            kernel::KernelBuilder * kb = nullptr;
392//            Module * m = nullptr;
393//            try {
394//                while (Q.pop(kb)) {
395//                    m = kb->getModule();
396//                    bool uncachedObject = true;
397//                    if (mCache && mCache->loadCachedObjectFile(kb)) {
398//                        uncachedObject = false;
399//                    }
400//                    if (uncachedObject) {
401//                        Module * const cm = iBuilder->getModule();
402//                        iBuilder->setModule(m);
403//                        kb->generateKernel();
404//                        PM.run(*m);
405//                        iBuilder->setModule(cm);
406//                    }
407//                    mEngine->addModule(std::unique_ptr<Module>(m));
408//                }
409//            } catch (...) {
410//                // clear the queue
411//                while (Q.pop(kb));
412//                // dump the result the module to the console
413//                if (m) m->dump();
414//                throw;
415//            }
416//        });
417//    }
418
419//    PM.run(*mMainModule);
420//    for (int i = 0; i < codegen::Jobs; ++i) {
421//        compilation_thread[i].join();
422//    }
423//    mEngine->finalizeObject();
424
425//    delete IROutputStream;
426//    #ifndef USE_LLVM_3_6
427//    delete ASMOutputStream;
428//    #endif
429
430//}
431
432
433//            std::unique_ptr<IDISA::IDISA_Builder> idb(IDISA::GetIDISA_Builder(kb->getModule()));
434//            idb->setDriver(this);
435//            kb->setBuilder(idb.get());
Note: See TracBrowser for help on using the repository browser.