source: icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp @ 5409

Last change on this file since 5409 was 5409, checked in by cameron, 2 years ago

Parabix driver can take ownership and allocate buffers

File size: 12.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/IR/LegacyPassManager.h>             // for PassManager
14#include <llvm/IR/IRPrintingPasses.h>
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#ifndef NDEBUG
17#include <llvm/IR/Verifier.h>
18#endif
19#include <llvm/PassRegistry.h>                     // for PassRegistry
20#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
21#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
22#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
23#include <llvm/Target/TargetOptions.h>             // for TargetOptions
24#include <llvm/Transforms/Scalar.h>
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/IR/Module.h>
27#include <kernels/object_cache.h>
28#include <kernels/pipeline.h>
29#include <kernels/kernel.h>
30#ifdef CUDA_ENABLED
31#include <IR_Gen/llvm2ptx.h>
32#endif
33 
34
35
36using namespace llvm;
37using namespace parabix;
38
39namespace codegen {
40
41static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
42
43static cl::bits<DebugFlags>
44DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
45#ifndef USE_LLVM_3_6
46                        clEnumVal(ShowASM, "Print assembly code."),
47#endif
48                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
49                        clEnumValEnd), cl::cat(CodeGenOptions));
50
51static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
52#ifndef USE_LLVM_3_6
53static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
54static cl::opt<bool> AsmVerbose("asm-verbose",
55                                cl::desc("Add comments to directives."),
56                                cl::init(true), cl::cat(CodeGenOptions));
57#endif
58
59char OptLevel;
60static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
61                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
62
63
64static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
65
66static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
67
68
69int BlockSize;
70int SegmentSize;
71int BufferSegments;
72int ThreadNum;
73bool EnableAsserts;
74#ifndef NDEBUG
75#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
76#else
77#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
78#endif
79
80static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
81static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
82static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
83static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
84static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
85
86const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
87
88bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
89
90static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
91   
92static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
93   
94#ifdef CUDA_ENABLED
95bool NVPTX;
96int GroupNum;
97static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
98static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
99#endif
100
101}
102
103#ifdef CUDA_ENABLED
104void setNVPTXOption(){
105    codegen::NVPTX = codegen::USENVPTX;
106}
107
108void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
109    InitializeAllTargets();
110    InitializeAllTargetMCs();
111    InitializeAllAsmPrinters();
112    InitializeAllAsmParsers();
113
114    PassRegistry *Registry = PassRegistry::getPassRegistry();
115    initializeCore(*Registry);
116    initializeCodeGen(*Registry);
117    initializeLoopStrengthReducePass(*Registry);
118    initializeLowerIntrinsicsPass(*Registry);
119    initializeUnreachableBlockElimPass(*Registry);
120
121    std::error_code error;
122    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
123    m->print(out, nullptr);
124
125    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
126            m->dump();
127
128    llvm2ptx(IRFilename, PTXFilename);
129}
130#endif
131
132void printParabixVersion () {
133    raw_ostream &OS = outs();
134    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
135}
136
137void AddParabixVersionPrinter() {
138    cl::AddExtraVersionPrinter(&printParabixVersion);
139}
140
141void setAllFeatures(EngineBuilder &builder) {
142    StringMap<bool> HostCPUFeatures;
143    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
144        std::vector<std::string> attrs;
145        for (auto &flag : HostCPUFeatures) {
146            auto enabled = flag.second ? "+" : "-";
147            attrs.push_back(enabled + flag.first().str());
148        }
149        builder.setMAttrs(attrs);
150    }
151}
152
153bool AVX2_available() {
154    StringMap<bool> HostCPUFeatures;
155    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
156        auto f = HostCPUFeatures.find("avx2");
157        return ((f != HostCPUFeatures.end()) && f->second);
158    }
159    return false;
160}
161
162ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
163: iBuilder(iBuilder)
164, mMainModule(iBuilder->getModule())
165, mTarget(nullptr)
166, mEngine(nullptr)
167, mCache(nullptr)
168{
169    InitializeNativeTarget();
170    InitializeNativeTargetAsmPrinter();
171    InitializeNativeTargetAsmParser();
172
173    PassRegistry * Registry = PassRegistry::getPassRegistry();
174    initializeCore(*Registry);
175    initializeCodeGen(*Registry);
176    initializeLowerIntrinsicsPass(*Registry);
177
178    std::string errMessage;
179    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
180    builder.setErrorStr(&errMessage);
181    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
182    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
183
184    builder.setTargetOptions(opts);
185    builder.setVerifyModules(true);
186    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
187    switch (codegen::OptLevel) {
188        case '0': optLevel = CodeGenOpt::None; break;
189        case '1': optLevel = CodeGenOpt::Less; break;
190        case '2': optLevel = CodeGenOpt::Default; break;
191        case '3': optLevel = CodeGenOpt::Aggressive; break;
192        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
193    }
194    builder.setOptLevel(optLevel);
195
196    setAllFeatures(builder);
197
198    mEngine = builder.create();
199    if (mEngine == nullptr) {
200        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
201    }
202    mTarget = builder.selectTarget();
203    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
204        if (codegen::ObjectCacheDir.empty()) {
205            mCache = new ParabixObjectCache();
206        } else {
207            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
208        }
209        assert (mCache);
210        mEngine->setObjectCache(mCache);
211    }
212}
213
214ExternalFileBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalFileBuffer> b, Value * externalBuf) {
215    ExternalFileBuffer * rawBuf = b.get();
216    mOwnedBuffers.push_back(std::move(b));
217    rawBuf->setStreamSetBuffer(externalBuf);
218    return rawBuf;
219}
220
221StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
222    b->allocateBuffer();
223    mOwnedBuffers.push_back(std::move(b));
224    return mOwnedBuffers.back().get();
225}
226
227
228void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
229    assert (mModuleMap.count(&kb) == 0);
230    mKernelList.push_back(&kb);
231    mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
232}
233
234void ParabixDriver::generatePipelineIR() {
235    // note: instantiation of all kernels must occur prior to initialization
236    for (const auto & k : mKernelList) {
237        k->addKernelDeclarations(mMainModule);
238    }
239    for (const auto & k : mKernelList) {
240        k->createInstance();
241    }
242    for (const auto & k : mKernelList) {
243        k->initializeInstance();
244    }
245    if (codegen::pipelineParallel) {
246        generateParallelPipeline(iBuilder, mKernelList);
247    } else if (codegen::segmentPipelineParallel) {
248        generateSegmentParallelPipeline(iBuilder, mKernelList);
249    } else {
250        codegen::ThreadNum = 1;
251        generatePipelineLoop(iBuilder, mKernelList);
252    }
253}
254
255void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
256    const auto f = mModuleMap.find(&kb);
257    assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
258    mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
259}
260
261void ParabixDriver::linkAndFinalize() {
262    Module * m = mMainModule;
263    #ifndef NDEBUG
264    try {
265    #endif
266    legacy::PassManager PM;
267    #ifndef NDEBUG
268    PM.add(createVerifierPass());
269    #endif
270    PM.add(createReassociatePass());             //Reassociate expressions.
271    PM.add(createGVNPass());                     //Eliminate common subexpressions.
272    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
273    PM.add(createCFGSimplificationPass());
274
275    raw_fd_ostream * IROutputStream = nullptr;
276    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
277        if (codegen::IROutputFilename.empty()) {
278            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
279        } else {
280            std::error_code error;
281            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
282        }
283        PM.add(createPrintModulePass(*IROutputStream));
284    }
285
286    #ifndef USE_LLVM_3_6
287    raw_fd_ostream * ASMOutputStream = nullptr;
288    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
289        if (codegen::ASMOutputFilename.empty()) {
290            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
291        } else {
292            std::error_code error;
293            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
294        }
295        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
296            report_fatal_error("LLVM error: could not add emit assembly pass");
297        }
298    }
299    #endif
300
301    PM.run(*m);
302    for (auto pair : mModuleMap) {
303        kernel::KernelBuilder * const kb = std::get<0>(pair);
304        m = std::get<1>(pair);
305        bool uncachedObject = true;
306        if (mCache) {
307            const std::string moduleID = m->getModuleIdentifier();
308            const std::string signature = kb->generateKernelSignature(moduleID);
309            if (mCache->loadCachedObjectFile(moduleID, signature)) {
310                uncachedObject = false;
311            }
312        }
313        if (uncachedObject) {
314            Module * const cm = iBuilder->getModule();
315            iBuilder->setModule(m);
316            kb->generateKernel();
317            PM.run(*m);
318            iBuilder->setModule(cm);
319        }       
320        mEngine->addModule(std::unique_ptr<Module>(m));
321    }   
322    mEngine->finalizeObject();
323
324    delete IROutputStream;
325    #ifndef USE_LLVM_3_6
326    delete ASMOutputStream;
327    #endif
328    #ifndef NDEBUG
329    } catch (...) { m->dump(); throw; }
330    #endif
331    mModuleMap.clear();
332}
333
334void * ParabixDriver::getPointerToMain() {
335    return mEngine->getPointerToNamedFunction("Main");
336}
337
338ParabixDriver::~ParabixDriver() {
339    delete mCache;
340}
Note: See TracBrowser for help on using the repository browser.