source: icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp @ 5408

Last change on this file since 5408 was 5408, checked in by nmedfort, 2 years ago

First attempt to allow Kernels to wait for consumers to finish processing before performing a realloc.

File size: 12.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/IR/LegacyPassManager.h>             // for PassManager
14#include <llvm/IR/IRPrintingPasses.h>
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#ifndef NDEBUG
17#include <llvm/IR/Verifier.h>
18#endif
19#include <llvm/PassRegistry.h>                     // for PassRegistry
20#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
21#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
22#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
23#include <llvm/Target/TargetOptions.h>             // for TargetOptions
24#include <llvm/Transforms/Scalar.h>
25#include <llvm/Transforms/Utils/Local.h>
26#include <llvm/IR/Module.h>
27#include <kernels/object_cache.h>
28#include <kernels/pipeline.h>
29#include <kernels/kernel.h>
30#ifdef CUDA_ENABLED
31#include <IR_Gen/llvm2ptx.h>
32#endif
33 
34
35
36using namespace llvm;
37
38namespace codegen {
39
40static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
41
42static cl::bits<DebugFlags>
43DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
44#ifndef USE_LLVM_3_6
45                        clEnumVal(ShowASM, "Print assembly code."),
46#endif
47                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
48                        clEnumValEnd), cl::cat(CodeGenOptions));
49
50static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
51#ifndef USE_LLVM_3_6
52static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
53static cl::opt<bool> AsmVerbose("asm-verbose",
54                                cl::desc("Add comments to directives."),
55                                cl::init(true), cl::cat(CodeGenOptions));
56#endif
57
58char OptLevel;
59static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
60                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
61
62
63static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
64
65static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
66
67
68int BlockSize;
69int SegmentSize;
70int BufferSegments;
71int ThreadNum;
72bool EnableAsserts;
73#ifndef NDEBUG
74#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
75#else
76#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
77#endif
78
79static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
80static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
81static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
82static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
83static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
84
85const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
86
87bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
88
89static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
90   
91static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
92   
93#ifdef CUDA_ENABLED
94bool NVPTX;
95int GroupNum;
96static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
97static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
98#endif
99
100}
101
102#ifdef CUDA_ENABLED
103void setNVPTXOption(){
104    codegen::NVPTX = codegen::USENVPTX;
105}
106
107void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
108    InitializeAllTargets();
109    InitializeAllTargetMCs();
110    InitializeAllAsmPrinters();
111    InitializeAllAsmParsers();
112
113    PassRegistry *Registry = PassRegistry::getPassRegistry();
114    initializeCore(*Registry);
115    initializeCodeGen(*Registry);
116    initializeLoopStrengthReducePass(*Registry);
117    initializeLowerIntrinsicsPass(*Registry);
118    initializeUnreachableBlockElimPass(*Registry);
119
120    std::error_code error;
121    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
122    m->print(out, nullptr);
123
124    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
125            m->dump();
126
127    llvm2ptx(IRFilename, PTXFilename);
128}
129#endif
130
131void printParabixVersion () {
132    raw_ostream &OS = outs();
133    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
134}
135
136void AddParabixVersionPrinter() {
137    cl::AddExtraVersionPrinter(&printParabixVersion);
138}
139
140void setAllFeatures(EngineBuilder &builder) {
141    StringMap<bool> HostCPUFeatures;
142    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
143        std::vector<std::string> attrs;
144        for (auto &flag : HostCPUFeatures) {
145            auto enabled = flag.second ? "+" : "-";
146            attrs.push_back(enabled + flag.first().str());
147        }
148        builder.setMAttrs(attrs);
149    }
150}
151
152bool AVX2_available() {
153    StringMap<bool> HostCPUFeatures;
154    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
155        auto f = HostCPUFeatures.find("avx2");
156        return ((f != HostCPUFeatures.end()) && f->second);
157    }
158    return false;
159}
160
161ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
162: iBuilder(iBuilder)
163, mMainModule(iBuilder->getModule())
164, mTarget(nullptr)
165, mEngine(nullptr)
166, mCache(nullptr)
167{
168    InitializeNativeTarget();
169    InitializeNativeTargetAsmPrinter();
170    InitializeNativeTargetAsmParser();
171
172    PassRegistry * Registry = PassRegistry::getPassRegistry();
173    initializeCore(*Registry);
174    initializeCodeGen(*Registry);
175    initializeLowerIntrinsicsPass(*Registry);
176
177    std::string errMessage;
178    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
179    builder.setErrorStr(&errMessage);
180    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
181    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
182
183    builder.setTargetOptions(opts);
184    builder.setVerifyModules(true);
185    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
186    switch (codegen::OptLevel) {
187        case '0': optLevel = CodeGenOpt::None; break;
188        case '1': optLevel = CodeGenOpt::Less; break;
189        case '2': optLevel = CodeGenOpt::Default; break;
190        case '3': optLevel = CodeGenOpt::Aggressive; break;
191        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
192    }
193    builder.setOptLevel(optLevel);
194
195    setAllFeatures(builder);
196
197    mEngine = builder.create();
198    if (mEngine == nullptr) {
199        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
200    }
201    mTarget = builder.selectTarget();
202    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
203        if (codegen::ObjectCacheDir.empty()) {
204            mCache = new ParabixObjectCache();
205        } else {
206            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
207        }
208        assert (mCache);
209        mEngine->setObjectCache(mCache);
210    }
211}
212
213void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
214    assert (mModuleMap.count(&kb) == 0);
215    mKernelList.push_back(&kb);
216    mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
217}
218
219void ParabixDriver::generatePipelineIR() {
220    // note: instantiation of all kernels must occur prior to initialization
221    for (const auto & k : mKernelList) {
222        k->addKernelDeclarations(mMainModule);
223    }
224    for (const auto & k : mKernelList) {
225        k->createInstance();
226    }
227    for (const auto & k : mKernelList) {
228        k->initializeInstance();
229    }
230    if (codegen::pipelineParallel) {
231        generateParallelPipeline(iBuilder, mKernelList);
232    } else if (codegen::segmentPipelineParallel) {
233        generateSegmentParallelPipeline(iBuilder, mKernelList);
234    } else {
235        codegen::ThreadNum = 1;
236        generatePipelineLoop(iBuilder, mKernelList);
237    }
238}
239
240void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
241    const auto f = mModuleMap.find(&kb);
242    assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
243    mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
244}
245
246void ParabixDriver::linkAndFinalize() {
247    Module * m = mMainModule;
248    #ifndef NDEBUG
249    try {
250    #endif
251    legacy::PassManager PM;
252    #ifndef NDEBUG
253    PM.add(createVerifierPass());
254    #endif
255    PM.add(createReassociatePass());             //Reassociate expressions.
256    PM.add(createGVNPass());                     //Eliminate common subexpressions.
257    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
258    PM.add(createCFGSimplificationPass());
259
260    raw_fd_ostream * IROutputStream = nullptr;
261    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
262        if (codegen::IROutputFilename.empty()) {
263            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
264        } else {
265            std::error_code error;
266            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
267        }
268        PM.add(createPrintModulePass(*IROutputStream));
269    }
270
271    #ifndef USE_LLVM_3_6
272    raw_fd_ostream * ASMOutputStream = nullptr;
273    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
274        if (codegen::ASMOutputFilename.empty()) {
275            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
276        } else {
277            std::error_code error;
278            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
279        }
280        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
281            report_fatal_error("LLVM error: could not add emit assembly pass");
282        }
283    }
284    #endif
285
286    PM.run(*m);
287    for (auto pair : mModuleMap) {
288        kernel::KernelBuilder * const kb = std::get<0>(pair);
289        m = std::get<1>(pair);
290        bool uncachedObject = true;
291        if (mCache) {
292            const std::string moduleID = m->getModuleIdentifier();
293            const std::string signature = kb->generateKernelSignature(moduleID);
294            if (mCache->loadCachedObjectFile(moduleID, signature)) {
295                uncachedObject = false;
296            }
297        }
298        if (uncachedObject) {
299            Module * const cm = iBuilder->getModule();
300            iBuilder->setModule(m);
301            kb->generateKernel();
302            PM.run(*m);
303            iBuilder->setModule(cm);
304        }       
305        mEngine->addModule(std::unique_ptr<Module>(m));
306    }   
307    mEngine->finalizeObject();
308
309    delete IROutputStream;
310    #ifndef USE_LLVM_3_6
311    delete ASMOutputStream;
312    #endif
313    #ifndef NDEBUG
314    } catch (...) { m->dump(); throw; }
315    #endif
316    mModuleMap.clear();
317}
318
319void * ParabixDriver::getPointerToMain() {
320    return mEngine->getPointerToNamedFunction("Main");
321}
322
323ParabixDriver::~ParabixDriver() {
324    delete mCache;
325}
Note: See TracBrowser for help on using the repository browser.