source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 17.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#ifndef NDEBUG
18#include <llvm/IR/Verifier.h>
19#include <boost/container/flat_set.hpp>
20#endif
21#include <llvm/PassRegistry.h>                     // for PassRegistry
22#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
23#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
24#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
25#include <llvm/Target/TargetOptions.h>             // for TargetOptions
26#include <llvm/Transforms/Scalar.h>
27#include <llvm/Transforms/Utils/Local.h>
28#include <llvm/IR/Module.h>
29#include <toolchain/object_cache.h>
30#include <toolchain/pipeline.h>
31#include <kernels/kernel_builder.h>
32#include <kernels/kernel.h>
33#include <sys/stat.h>
34#include <thread>
35#include <boost/lockfree/queue.hpp>
36
37using namespace llvm;
38using namespace parabix;
39
40namespace codegen {
41
42static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
43
44static cl::bits<DebugFlags>
45DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
46#ifndef USE_LLVM_3_6
47                        clEnumVal(ShowASM, "Print assembly code."),
48#endif
49                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
50                        clEnumValEnd), cl::cat(CodeGenOptions));
51
52static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
53#ifndef USE_LLVM_3_6
54static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
55static cl::opt<bool> AsmVerbose("asm-verbose",
56                                cl::desc("Add comments to directives."),
57                                cl::init(true), cl::cat(CodeGenOptions));
58#endif
59
60char OptLevel;
61static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
62                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
63
64
65static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
66
67static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
68
69
70int BlockSize;
71int SegmentSize;
72int BufferSegments;
73int ThreadNum;
74bool EnableAsserts;
75bool EnableCycleCounter;
76#ifndef NDEBUG
77#define IN_DEBUG_MODE true
78#else
79#define IN_DEBUG_MODE false
80#endif
81
82static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
83static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
84static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
85static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
86static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
87static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
88
89const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
90
91bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
92
93static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
94   
95static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
96
97}
98
99void printParabixVersion () {
100    raw_ostream &OS = outs();
101    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
102}
103
104void AddParabixVersionPrinter() {
105    cl::AddExtraVersionPrinter(&printParabixVersion);
106}
107
108void setAllFeatures(EngineBuilder &builder) {
109    StringMap<bool> HostCPUFeatures;
110    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
111        std::vector<std::string> attrs;
112        for (auto &flag : HostCPUFeatures) {
113            auto enabled = flag.second ? "+" : "-";
114            attrs.push_back(enabled + flag.first().str());
115        }
116        builder.setMAttrs(attrs);
117    }
118}
119
120bool AVX2_available() {
121    StringMap<bool> HostCPUFeatures;
122    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
123        auto f = HostCPUFeatures.find("avx2");
124        return ((f != HostCPUFeatures.end()) && f->second);
125    }
126    return false;
127}
128
129ParabixDriver::ParabixDriver(std::string && moduleName)
130: mContext(new llvm::LLVMContext())
131, mMainModule(new Module(moduleName, *mContext))
132, iBuilder(nullptr)
133, mTarget(nullptr)
134, mEngine(nullptr)
135, mCache(nullptr) {
136
137    InitializeNativeTarget();
138    InitializeNativeTargetAsmPrinter();
139    InitializeNativeTargetAsmParser();
140
141    PassRegistry * Registry = PassRegistry::getPassRegistry();
142    initializeCore(*Registry);
143    initializeCodeGen(*Registry);
144    initializeLowerIntrinsicsPass(*Registry);
145
146    std::string errMessage;
147    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
148    builder.setErrorStr(&errMessage);
149    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
150    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
151
152    builder.setTargetOptions(opts);
153    builder.setVerifyModules(IN_DEBUG_MODE);
154    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
155    switch (codegen::OptLevel) {
156        case '0': optLevel = CodeGenOpt::None; break;
157        case '1': optLevel = CodeGenOpt::Less; break;
158        case '2': optLevel = CodeGenOpt::Default; break;
159        case '3': optLevel = CodeGenOpt::Aggressive; break;
160        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
161    }
162    builder.setOptLevel(optLevel);
163
164    setAllFeatures(builder);
165    mEngine = builder.create();
166    if (mEngine == nullptr) {
167        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
168    }
169    mTarget = builder.selectTarget();
170    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
171        if (codegen::ObjectCacheDir.empty()) {
172            mCache = new ParabixObjectCache();
173        } else {
174            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
175        }
176        assert (mCache);
177        mEngine->setObjectCache(mCache);
178    }
179
180    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
181
182    iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
183    iBuilder->setDriver(this);
184    iBuilder->setModule(mMainModule);
185}
186
187ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
188    mOwnedBuffers.emplace_back(std::move(b));
189    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
190}
191
192StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
193    b->allocateBuffer(iBuilder);
194    mOwnedBuffers.emplace_back(std::move(b));
195    return mOwnedBuffers.back().get();
196}
197
198kernel::Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::Kernel> kb) {
199    mOwnedKernels.emplace_back(std::move(kb));
200    return mOwnedKernels.back().get();
201}
202
203void ParabixDriver::addKernelCall(kernel::Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
204    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
205    mPipeline.emplace_back(&kb);
206    assert (mMainModule);
207    kb.setBuilder(iBuilder);
208    kb.createKernelStub(inputs, outputs);
209}
210
211void ParabixDriver::makeKernelCall(kernel::Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
212    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
213    mPipeline.emplace_back(kb);   
214    kb->setBuilder(iBuilder);
215    kb->createKernelStub(inputs, outputs);
216}
217
218void ParabixDriver::generatePipelineIR() {
219    #ifndef NDEBUG
220    if (LLVM_UNLIKELY(mPipeline.empty())) {
221        report_fatal_error("Pipeline must contain at least one kernel");
222    } else {
223        boost::container::flat_set<kernel::Kernel *> K(mPipeline.begin(), mPipeline.end());
224        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
225            report_fatal_error("Kernel definitions can only occur once in the pipeline");
226        }
227    }
228    #endif
229
230    // note: instantiation of all kernels must occur prior to initialization
231    for (const auto & k : mPipeline) {
232        k->addKernelDeclarations();
233    }
234    for (const auto & k : mPipeline) {
235        k->createInstance();
236    }
237    for (const auto & k : mPipeline) {
238        k->initializeInstance();
239    }
240    if (codegen::pipelineParallel) {
241        generateParallelPipeline(iBuilder, mPipeline);
242    } else if (codegen::segmentPipelineParallel) {
243        generateSegmentParallelPipeline(iBuilder, mPipeline);
244    } else {
245        codegen::ThreadNum = 1;
246        generatePipelineLoop(iBuilder, mPipeline);
247    }
248    for (const auto & k : mPipeline) {
249        k->setBuilder(iBuilder);
250        k->finalizeInstance();
251    }
252}
253
254Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
255    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
256    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
257    mEngine->addGlobalMapping(f, functionPtr);
258    return f;
259}
260
261void ParabixDriver::linkAndFinalize() {
262    Module * m = mMainModule;
263    #ifndef NDEBUG
264    try {
265    #endif
266    legacy::PassManager PM;
267    #ifndef NDEBUG
268    PM.add(createVerifierPass());
269    #endif
270    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
271    PM.add(createReassociatePass());             //Reassociate expressions.
272    PM.add(createGVNPass());                     //Eliminate common subexpressions.
273    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
274    PM.add(createCFGSimplificationPass());
275
276    raw_fd_ostream * IROutputStream = nullptr;
277    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
278        if (codegen::IROutputFilename.empty()) {
279            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
280        } else {
281            std::error_code error;
282            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
283        }
284        PM.add(createPrintModulePass(*IROutputStream));
285    }
286
287    #ifndef USE_LLVM_3_6
288    raw_fd_ostream * ASMOutputStream = nullptr;
289    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
290        if (codegen::ASMOutputFilename.empty()) {
291            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
292        } else {
293            std::error_code error;
294            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
295        }
296        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
297            report_fatal_error("LLVM error: could not add emit assembly pass");
298        }
299    }
300    #endif
301
302    PM.run(*m);
303
304    for (kernel::Kernel * const k : mPipeline) {
305        m = k->getModule();
306        bool uncachedObject = true;
307        if (mCache && mCache->loadCachedObjectFile(k)) {
308            uncachedObject = false;
309        }
310        if (uncachedObject) {
311            iBuilder->setModule(m);
312            k->setBuilder(iBuilder);
313            k->generateKernel();
314            PM.run(*m);
315        }
316        mEngine->addModule(std::unique_ptr<Module>(m));
317    }   
318    mEngine->finalizeObject();
319
320    iBuilder->setModule(mMainModule);
321
322    delete IROutputStream;
323    #ifndef USE_LLVM_3_6
324    delete ASMOutputStream;
325    #endif
326    #ifndef NDEBUG
327    } catch (...) { m->dump(); throw; }
328    #endif
329}
330
331const std::unique_ptr<kernel::KernelBuilder> & ParabixDriver::getBuilder() {
332    return iBuilder;
333}
334
335void * ParabixDriver::getPointerToMain() {
336    return mEngine->getPointerToNamedFunction("Main");
337}
338
339ParabixDriver::~ParabixDriver() {
340    delete mCache;
341}
342
343
344//void ParabixDriver::linkAndFinalize() {
345
346//    using KernelQueue = boost::lockfree::queue<kernel::KernelBuilder *>;
347
348//    legacy::PassManager PM;
349//    #ifndef NDEBUG
350//    PM.add(createVerifierPass());
351//    #endif
352//    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
353//    PM.add(createReassociatePass());             //Reassociate expressions.
354//    PM.add(createGVNPass());                     //Eliminate common subexpressions.
355//    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
356//    PM.add(createCFGSimplificationPass());
357
358//    raw_fd_ostream * IROutputStream = nullptr;
359//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
360//        if (codegen::IROutputFilename.empty()) {
361//            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
362//        } else {
363//            std::error_code error;
364//            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
365//        }
366//        PM.add(createPrintModulePass(*IROutputStream));
367//        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
368//    }
369
370//    #ifndef USE_LLVM_3_6
371//    raw_fd_ostream * ASMOutputStream = nullptr;
372//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
373//        if (codegen::ASMOutputFilename.empty()) {
374//            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
375//        } else {
376//            std::error_code error;
377//            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
378//        }
379//        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
380//            report_fatal_error("LLVM error: could not add emit assembly pass");
381//        }
382//        codegen::Jobs = 1; // TODO: set Jobs to 1 for now; these should be updated to pipe to a temporary buffer when Jobs > 1
383//    }
384//    #endif
385
386//    KernelQueue Q(mPipeline.size() + 1);
387//    for (kernel::KernelBuilder * kb : mPipeline) {
388//        assert (kb);
389//        Q.unsynchronized_push(kb);
390//    }
391
392//    std::thread compilation_thread[codegen::Jobs];
393//    for (int i = 0; i < codegen::Jobs; ++i) {
394//        compilation_thread[i] = std::thread([&]{
395//            kernel::KernelBuilder * kb = nullptr;
396//            Module * m = nullptr;
397//            try {
398//                while (Q.pop(kb)) {
399//                    m = kb->getModule();
400//                    bool uncachedObject = true;
401//                    if (mCache && mCache->loadCachedObjectFile(kb)) {
402//                        uncachedObject = false;
403//                    }
404//                    if (uncachedObject) {
405//                        Module * const cm = iBuilder->getModule();
406//                        iBuilder->setModule(m);
407//                        kb->generateKernel();
408//                        PM.run(*m);
409//                        iBuilder->setModule(cm);
410//                    }
411//                    mEngine->addModule(std::unique_ptr<Module>(m));
412//                }
413//            } catch (...) {
414//                // clear the queue
415//                while (Q.pop(kb));
416//                // dump the result the module to the console
417//                if (m) m->dump();
418//                throw;
419//            }
420//        });
421//    }
422
423//    PM.run(*mMainModule);
424//    for (int i = 0; i < codegen::Jobs; ++i) {
425//        compilation_thread[i].join();
426//    }
427//    mEngine->finalizeObject();
428
429//    delete IROutputStream;
430//    #ifndef USE_LLVM_3_6
431//    delete ASMOutputStream;
432//    #endif
433
434//}
435
436
437//            std::unique_ptr<IDISA::IDISA_Builder> idb(IDISA::GetIDISA_Builder(kb->getModule()));
438//            idb->setDriver(this);
439//            kb->setBuilder(idb.get());
Note: See TracBrowser for help on using the repository browser.