source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5458

Last change on this file since 5458 was 5458, checked in by lindanl, 2 years ago

Add NVPTX driver.

File size: 18.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <IR_Gen/idisa_target.h>
9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
15#include <llvm/IR/IRPrintingPasses.h>
16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#include <llvm/PassRegistry.h>                     // for PassRegistry
18#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
19#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
20#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
21#include <llvm/Target/TargetOptions.h>             // for TargetOptions
22#include <llvm/Transforms/Scalar.h>
23#include <llvm/Transforms/Utils/Local.h>
24#include <llvm/IR/Module.h>
25#include <toolchain/object_cache.h>
26#include <toolchain/pipeline.h>
27#include <kernels/kernel_builder.h>
28#include <kernels/kernel.h>
29#include <sys/stat.h>
30#include <llvm/IR/Verifier.h>
31#include <toolchain/NVPTXDriver.cpp>
32//#include <toolchain/workqueue.h>
33
34
35using namespace llvm;
36using namespace parabix;
37
38using Kernel = kernel::Kernel;
39using KernelBuilder = kernel::KernelBuilder;
40
41#ifndef NDEBUG
42#define IN_DEBUG_MODE true
43#else
44#define IN_DEBUG_MODE false
45#endif
46
47namespace codegen {
48
49static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
50
51static cl::bits<DebugFlags>
52DebugOptions(cl::values(clEnumVal(ShowUnoptimizedIR, "Print generated LLVM IR."),
53                        clEnumVal(ShowIR, "Print optimized LLVM IR."),
54                        clEnumVal(VerifyIR, "Run the IR verification pass."),
55#ifndef USE_LLVM_3_6
56                        clEnumVal(ShowASM, "Print assembly code."),
57#endif
58                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
59                        clEnumValEnd), cl::cat(CodeGenOptions));
60
61static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
62#ifndef USE_LLVM_3_6
63static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
64static cl::opt<bool> AsmVerbose("asm-verbose",
65                                cl::desc("Add comments to directives."),
66                                cl::init(true), cl::cat(CodeGenOptions));
67#endif
68
69char OptLevel;
70static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
71                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
72
73
74static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
75
76static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
77
78
79int BlockSize;
80int SegmentSize;
81int BufferSegments;
82int ThreadNum;
83bool EnableAsserts;
84bool EnableCycleCounter;
85
86static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
87static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
88static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
89static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
90static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
91static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
92
93const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
94
95bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
96
97static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
98   
99static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
100
101bool NVPTX; 
102int GroupNum; 
103static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false)); 
104static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256)); 
105
106}
107
108void setNVPTXOption(){
109    codegen::NVPTX = codegen::USENVPTX; 
110    if(codegen::NVPTX){
111#ifndef CUDA_ENABLED
112    std::cerr << "CUDA compiler is not supported.\n";
113    exit(-1);
114#endif
115    }
116}
117
118void printParabixVersion () {
119    raw_ostream &OS = outs();
120    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
121}
122
123void AddParabixVersionPrinter() {
124    cl::AddExtraVersionPrinter(&printParabixVersion);
125}
126
127void setAllFeatures(EngineBuilder &builder) {
128    StringMap<bool> HostCPUFeatures;
129    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
130        std::vector<std::string> attrs;
131        for (auto &flag : HostCPUFeatures) {
132            auto enabled = flag.second ? "+" : "-";
133            attrs.push_back(enabled + flag.first().str());
134        }
135        builder.setMAttrs(attrs);
136    }
137}
138
139bool AVX2_available() {
140    StringMap<bool> HostCPUFeatures;
141    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
142        auto f = HostCPUFeatures.find("avx2");
143        return ((f != HostCPUFeatures.end()) && f->second);
144    }
145    return false;
146}
147
148ParabixDriver::ParabixDriver(std::string && moduleName)
149: mContext(new llvm::LLVMContext())
150, mMainModule(new Module(moduleName, *mContext))
151, iBuilder(nullptr)
152, mTarget(nullptr)
153, mEngine(nullptr)
154, mCache(nullptr) {
155
156    InitializeNativeTarget();
157    InitializeNativeTargetAsmPrinter();
158    InitializeNativeTargetAsmParser();
159
160    PassRegistry * Registry = PassRegistry::getPassRegistry();
161    initializeCore(*Registry);
162    initializeCodeGen(*Registry);
163    initializeLowerIntrinsicsPass(*Registry);
164
165    std::string errMessage;
166    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
167    builder.setUseOrcMCJITReplacement(true);
168    builder.setErrorStr(&errMessage);
169    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
170    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
171    builder.setTargetOptions(opts);
172    builder.setVerifyModules(false);
173    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
174    switch (codegen::OptLevel) {
175        case '0': optLevel = CodeGenOpt::None; break;
176        case '1': optLevel = CodeGenOpt::Less; break;
177        case '2': optLevel = CodeGenOpt::Default; break;
178        case '3': optLevel = CodeGenOpt::Aggressive; break;
179        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
180    }
181    builder.setOptLevel(optLevel);
182    setAllFeatures(builder);
183    mEngine = builder.create();
184    if (mEngine == nullptr) {
185        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
186    }
187    mTarget = builder.selectTarget();
188    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
189        if (codegen::ObjectCacheDir.empty()) {
190            mCache = new ParabixObjectCache();
191        } else {
192            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
193        }
194        mEngine->setObjectCache(mCache);
195    }
196
197    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
198
199    iBuilder.reset(IDISA::GetIDISA_Builder(*mContext, mMainModule->getTargetTriple()));
200    iBuilder->setDriver(this);
201    iBuilder->setModule(mMainModule);
202}
203
204ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
205    mOwnedBuffers.emplace_back(std::move(b));
206    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
207}
208
209StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
210    b->allocateBuffer(iBuilder);
211    mOwnedBuffers.emplace_back(std::move(b));
212    return mOwnedBuffers.back().get();
213}
214
215Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<Kernel> kb) {
216    mOwnedKernels.emplace_back(std::move(kb));
217    return mOwnedKernels.back().get();
218}
219
220void ParabixDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
221    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
222    mPipeline.emplace_back(&kb);
223    kb.bindPorts(inputs, outputs);
224    kb.makeModule(iBuilder);
225}
226
227void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
228    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
229    mPipeline.emplace_back(kb);   
230    kb->bindPorts(inputs, outputs);
231    kb->makeModule(iBuilder);
232}
233
234void ParabixDriver::generatePipelineIR() {
235    #ifndef NDEBUG
236    if (LLVM_UNLIKELY(mPipeline.empty())) {
237        report_fatal_error("Pipeline cannot be empty");
238    } else {
239        for (auto i = mPipeline.begin(); i != mPipeline.end(); ++i) {
240            for (auto j = i; ++j != mPipeline.end(); ) {
241                if (LLVM_UNLIKELY(*i == *j)) {
242                    report_fatal_error("Kernel instances cannot occur twice in the pipeline");
243                }
244            }
245        }
246    }
247    #endif
248    // note: instantiation of all kernels must occur prior to initialization
249    for (const auto & k : mPipeline) {
250        k->addKernelDeclarations(iBuilder);
251    }
252    for (const auto & k : mPipeline) {
253        k->createInstance(iBuilder);
254    }
255    for (const auto & k : mPipeline) {
256        k->initializeInstance(iBuilder);
257    }
258    if (codegen::pipelineParallel) {
259        generateParallelPipeline(iBuilder, mPipeline);
260    } else if (codegen::segmentPipelineParallel) {
261        generateSegmentParallelPipeline(iBuilder, mPipeline);
262    } else {
263        codegen::ThreadNum = 1;
264        generatePipelineLoop(iBuilder, mPipeline);
265    }
266    for (const auto & k : mPipeline) {
267        k->finalizeInstance(iBuilder);
268    }
269}
270
271Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
272    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
273    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
274    mEngine->addGlobalMapping(f, functionPtr);
275    return f;
276}
277
278void ParabixDriver::linkAndFinalize() {
279
280    legacy::PassManager PM;
281    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
282    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
283        if (codegen::IROutputFilename.empty()) {
284            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
285        } else {
286            std::error_code error;
287            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
288        }
289        PM.add(createPrintModulePass(*IROutputStream));
290    }
291
292    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
293        PM.add(createVerifierPass());
294    }
295    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
296    PM.add(createReassociatePass());             //Reassociate expressions.
297    PM.add(createGVNPass());                     //Eliminate common subexpressions.
298    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
299    PM.add(createCFGSimplificationPass());
300    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
301        if (LLVM_LIKELY(IROutputStream == nullptr)) {
302            if (codegen::IROutputFilename.empty()) {
303                IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
304            } else {
305                std::error_code error;
306                IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
307            }
308        }
309        PM.add(createPrintModulePass(*IROutputStream));
310    }
311
312    #ifndef USE_LLVM_3_6
313    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
314    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
315        if (codegen::ASMOutputFilename.empty()) {
316            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
317        } else {
318            std::error_code error;
319            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
320        }
321        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
322            report_fatal_error("LLVM error: could not add emit assembly pass");
323        }
324    }
325    #endif
326
327    Module * module = nullptr;
328
329    try {
330
331        for (Kernel * const kernel : mPipeline) {
332            iBuilder->setKernel(kernel);
333            module = kernel->getModule();
334            bool uncachedObject = true;
335            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
336                uncachedObject = false;
337            }
338            if (uncachedObject) {
339                module->setTargetTriple(mMainModule->getTargetTriple());
340                kernel->generateKernel(iBuilder);
341                PM.run(*module);
342            }
343            mEngine->addModule(std::unique_ptr<Module>(module));
344            mEngine->generateCodeForModule(module);
345        }
346
347        iBuilder->setKernel(nullptr);
348        module = mMainModule;
349        PM.run(*mMainModule);
350
351        mEngine->finalizeObject();
352
353    } catch (const std::exception & e) {
354        report_fatal_error(e.what());
355    }
356
357}
358
359
360//void ParabixDriver::linkAndFinalize() {
361
362//    legacy::PassManager PM;
363//    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
364//        PM.add(createVerifierPass());
365//    }
366//    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
367//    PM.add(createReassociatePass());             //Reassociate expressions.
368//    PM.add(createGVNPass());                     //Eliminate common subexpressions.
369//    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
370//    PM.add(createCFGSimplificationPass());
371
372//    unsigned threadCount = 2; //std::thread::hardware_concurrency();
373
374//    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
375//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
376//        threadCount = 1; // If we're dumping IR, disable seperate compilation
377//        if (codegen::IROutputFilename.empty()) {
378//            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
379//        } else {
380//            std::error_code error;
381//            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
382//        }
383//        PM.add(createPrintModulePass(*IROutputStream));
384//    }
385
386//    #ifndef USE_LLVM_3_6
387//    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
388//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
389//        threadCount = 1; // If we're dumping ASM, disable seperate compilation
390//        if (codegen::ASMOutputFilename.empty()) {
391//            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
392//        } else {
393//            std::error_code error;
394//            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
395//        }
396//        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
397//            report_fatal_error("LLVM error: could not add emit assembly pass");
398//        }
399//    }
400//    #endif
401
402//    Module * module = mMainModule;
403//    WorkQueue<Module *> Q(mPipeline.size());
404//    std::thread compilation_thread[threadCount - 1];
405
406//    try {
407
408//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
409//            compilation_thread[i] = std::thread([this, &Q]{
410
411//                InitializeNativeTarget();
412
413//                Module * module = nullptr;
414//                while (Q.pop(module)) {
415//                    mEngine->addModule(std::unique_ptr<Module>(module));
416//                    mEngine->generateCodeForModule(module);
417//                }
418//            });
419//        }
420
421//        module = mMainModule;
422//        iBuilder->setKernel(nullptr);
423//        PM.run(*mMainModule);
424//        Q.push(mMainModule);
425
426//        for (Kernel * const kernel : mPipeline) {
427//            iBuilder->setKernel(kernel);
428//            module = kernel->getModule();
429//            bool uncachedObject = true;
430//            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
431//                uncachedObject = false;
432//            }
433//            if (uncachedObject) {
434//                module->setTargetTriple(mMainModule->getTargetTriple());
435//                kernel->generateKernel(iBuilder);
436//                PM.run(*module);
437//            }
438//            Q.push(module);
439//        }
440
441//        for (;;) {
442//            if (Q.empty()) {
443//                break;
444//            } else if (Q.try_pop(module)) {
445//                mEngine->addModule(std::unique_ptr<Module>(module));
446//                mEngine->generateCodeForModule(module);
447//            }
448//        }
449
450//        Q.notify_all();
451//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
452//            compilation_thread[i].join();
453//        }
454
455//        assert (Q.empty());
456
457//        mEngine->finalizeObject();
458
459//    } catch (const std::exception & e) {
460//        module->dump();
461//        report_fatal_error(e.what());
462//    }
463
464//}
465
466const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
467    return iBuilder;
468}
469
470void * ParabixDriver::getPointerToMain() {
471    return mEngine->getPointerToNamedFunction("Main");
472}
473
474ParabixDriver::~ParabixDriver() {
475    delete mCache;
476}
Note: See TracBrowser for help on using the repository browser.