source: icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp @ 5424

Last change on this file since 5424 was 5424, checked in by cameron, 2 years ago

ShowKernelCycles? option

File size: 14.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/IR/LegacyPassManager.h>             // for PassManager
14#include <llvm/IR/IRPrintingPasses.h>
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#ifndef NDEBUG
17#include <llvm/IR/Verifier.h>
18#include <boost/container/flat_set.hpp>
19#endif
20#include <llvm/PassRegistry.h>                     // for PassRegistry
21#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
22#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
23#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
24#include <llvm/Target/TargetOptions.h>             // for TargetOptions
25#include <llvm/Transforms/Scalar.h>
26#include <llvm/Transforms/Utils/Local.h>
27#include <llvm/IR/Module.h>
28#include <kernels/object_cache.h>
29#include <kernels/pipeline.h>
30#include <kernels/kernel.h>
31#include <sys/stat.h>
32
33using namespace llvm;
34using namespace parabix;
35
36namespace codegen {
37
38static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
39
40static cl::bits<DebugFlags>
41DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
42#ifndef USE_LLVM_3_6
43                        clEnumVal(ShowASM, "Print assembly code."),
44#endif
45                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
46                        clEnumValEnd), cl::cat(CodeGenOptions));
47
48static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
49#ifndef USE_LLVM_3_6
50static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
51static cl::opt<bool> AsmVerbose("asm-verbose",
52                                cl::desc("Add comments to directives."),
53                                cl::init(true), cl::cat(CodeGenOptions));
54#endif
55
56char OptLevel;
57static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
58                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
59
60
61static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
62
63static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
64
65
66int BlockSize;
67int SegmentSize;
68int BufferSegments;
69int ThreadNum;
70bool EnableAsserts;
71bool EnableCycleCounter;
72#ifndef NDEBUG
73#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
74#else
75#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
76#endif
77
78static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
79static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
80static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
81static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
82static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
83static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
84
85const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
86
87bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
88
89static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
90   
91static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
92   
93#ifdef CUDA_ENABLED
94bool NVPTX;
95int GroupNum;
96static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
97static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
98#endif
99
100}
101
102#ifdef CUDA_ENABLED
103void setNVPTXOption(){
104    codegen::NVPTX = codegen::USENVPTX;
105}
106
107void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
108    InitializeAllTargets();
109    InitializeAllTargetMCs();
110    InitializeAllAsmPrinters();
111    InitializeAllAsmParsers();
112
113    PassRegistry *Registry = PassRegistry::getPassRegistry();
114    initializeCore(*Registry);
115    initializeCodeGen(*Registry);
116    initializeLoopStrengthReducePass(*Registry);
117    initializeLowerIntrinsicsPass(*Registry);
118    initializeUnreachableBlockElimPass(*Registry);
119
120    std::error_code error;
121    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
122    m->print(out, nullptr);
123
124    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
125            m->dump();
126
127    llvm2ptx(IRFilename, PTXFilename);
128}
129#endif
130
131void printParabixVersion () {
132    raw_ostream &OS = outs();
133    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
134}
135
136void AddParabixVersionPrinter() {
137    cl::AddExtraVersionPrinter(&printParabixVersion);
138}
139
140void setAllFeatures(EngineBuilder &builder) {
141    StringMap<bool> HostCPUFeatures;
142    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
143        std::vector<std::string> attrs;
144        for (auto &flag : HostCPUFeatures) {
145            auto enabled = flag.second ? "+" : "-";
146            attrs.push_back(enabled + flag.first().str());
147        }
148        builder.setMAttrs(attrs);
149    }
150}
151
152bool AVX2_available() {
153    StringMap<bool> HostCPUFeatures;
154    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
155        auto f = HostCPUFeatures.find("avx2");
156        return ((f != HostCPUFeatures.end()) && f->second);
157    }
158    return false;
159}
160
161ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
162: iBuilder(iBuilder)
163, mMainModule(iBuilder->getModule())
164, mTarget(nullptr)
165, mEngine(nullptr)
166, mCache(nullptr)
167{
168    InitializeNativeTarget();
169    InitializeNativeTargetAsmPrinter();
170    InitializeNativeTargetAsmParser();
171
172    PassRegistry * Registry = PassRegistry::getPassRegistry();
173    initializeCore(*Registry);
174    initializeCodeGen(*Registry);
175    initializeLowerIntrinsicsPass(*Registry);
176
177    std::string errMessage;
178    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
179    builder.setErrorStr(&errMessage);
180    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
181    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
182
183    builder.setTargetOptions(opts);
184    builder.setVerifyModules(true);
185    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
186    switch (codegen::OptLevel) {
187        case '0': optLevel = CodeGenOpt::None; break;
188        case '1': optLevel = CodeGenOpt::Less; break;
189        case '2': optLevel = CodeGenOpt::Default; break;
190        case '3': optLevel = CodeGenOpt::Aggressive; break;
191        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
192    }
193    builder.setOptLevel(optLevel);
194
195    setAllFeatures(builder);
196
197    mEngine = builder.create();
198    if (mEngine == nullptr) {
199        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
200    }
201    mTarget = builder.selectTarget();
202    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
203        if (codegen::ObjectCacheDir.empty()) {
204            mCache = new ParabixObjectCache();
205        } else {
206            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
207        }
208        assert (mCache);
209        mEngine->setObjectCache(mCache);
210    }
211}
212
213ExternalFileBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalFileBuffer> b, Value * externalBuf) {
214    ExternalFileBuffer * rawBuf = b.get();
215    mOwnedBuffers.push_back(std::move(b));
216    rawBuf->setStreamSetBuffer(externalBuf);
217    return rawBuf;
218}
219
220StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
221    b->allocateBuffer();
222    mOwnedBuffers.push_back(std::move(b));
223    return mOwnedBuffers.back().get();
224}
225
226kernel::KernelBuilder * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::KernelBuilder> kb) {
227    mOwnedKernels.push_back(std::move(kb));
228    return mOwnedKernels.back().get();
229}
230
231void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
232    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
233    mPipeline.push_back(&kb);
234    kb.createKernelStub(inputs, outputs);
235}
236
237void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
238    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
239    mPipeline.push_back(kb);
240    kb->createKernelStub(inputs, outputs);
241}
242
243void ParabixDriver::generatePipelineIR() {
244    #ifndef NDEBUG
245    if (LLVM_UNLIKELY(mPipeline.empty())) {
246        report_fatal_error("Pipeline must contain at least one kernel");
247    } else {
248        boost::container::flat_set<kernel::KernelBuilder *> K(mPipeline.begin(), mPipeline.end());
249        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
250            report_fatal_error("Kernel definitions can only occur once in the pipeline");
251        }
252    }
253    #endif
254    // note: instantiation of all kernels must occur prior to initialization
255    for (const auto & k : mPipeline) {
256        k->addKernelDeclarations(mMainModule);
257    }
258    for (const auto & k : mPipeline) {
259        k->createInstance();
260    }
261    for (const auto & k : mPipeline) {
262        k->initializeInstance();
263    }
264    if (codegen::pipelineParallel) {
265        generateParallelPipeline(iBuilder, mPipeline);
266    } else if (codegen::segmentPipelineParallel) {
267        generateSegmentParallelPipeline(iBuilder, mPipeline);
268    } else {
269        codegen::ThreadNum = 1;
270        generatePipelineLoop(iBuilder, mPipeline);
271    }
272    for (const auto & k : mPipeline) {
273        k->finalizeInstance();
274    }
275}
276
277void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
278    assert ("addKernelCall or makeKernelCall must be called before addExternalLink" && (kb.getModule() != nullptr));
279    mEngine->addGlobalMapping(cast<Function>(kb.getModule()->getOrInsertFunction(name, type)), functionPtr);
280}
281
282uint64_t file_size(const uint32_t fd) {
283    struct stat st;
284    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
285        st.st_size = 0;
286    }
287    return st.st_size;
288}
289
290void ParabixDriver::linkAndFinalize() {
291    Module * m = mMainModule;
292    #ifndef NDEBUG
293    try {
294    #endif
295    legacy::PassManager PM;
296    #ifndef NDEBUG
297    PM.add(createVerifierPass());
298    #endif
299    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
300    PM.add(createReassociatePass());             //Reassociate expressions.
301    PM.add(createGVNPass());                     //Eliminate common subexpressions.
302    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
303    PM.add(createCFGSimplificationPass());
304
305    raw_fd_ostream * IROutputStream = nullptr;
306    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
307        if (codegen::IROutputFilename.empty()) {
308            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
309        } else {
310            std::error_code error;
311            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
312        }
313        PM.add(createPrintModulePass(*IROutputStream));
314    }
315
316    #ifndef USE_LLVM_3_6
317    raw_fd_ostream * ASMOutputStream = nullptr;
318    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
319        if (codegen::ASMOutputFilename.empty()) {
320            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
321        } else {
322            std::error_code error;
323            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
324        }
325        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
326            report_fatal_error("LLVM error: could not add emit assembly pass");
327        }
328    }
329    #endif
330
331    FunctionType * fileSizeType = FunctionType::get(iBuilder->getInt64Ty(), { iBuilder->getInt32Ty() });
332    mEngine->addGlobalMapping(cast<Function>(mMainModule->getOrInsertFunction("file_size", fileSizeType)), (void *)&file_size);
333
334    PM.run(*m);
335    for (kernel::KernelBuilder * const kb : mPipeline) {
336        m = kb->getModule();
337        bool uncachedObject = true;
338        if (mCache) {
339            const std::string moduleID = m->getModuleIdentifier();
340            const std::string signature = kb->generateKernelSignature(moduleID);
341            if (mCache->loadCachedObjectFile(moduleID, signature)) {
342                uncachedObject = false;
343            }
344        }
345        if (uncachedObject) {
346            Module * const cm = iBuilder->getModule();
347            iBuilder->setModule(m);
348            kb->generateKernel();
349            PM.run(*m);
350            iBuilder->setModule(cm);
351        }       
352        mEngine->addModule(std::unique_ptr<Module>(m));
353    }   
354    mEngine->finalizeObject();
355
356    delete IROutputStream;
357    #ifndef USE_LLVM_3_6
358    delete ASMOutputStream;
359    #endif
360    #ifndef NDEBUG
361    } catch (...) { m->dump(); throw; }
362    #endif
363}
364
365void * ParabixDriver::getPointerToMain() {
366    return mEngine->getPointerToNamedFunction("Main");
367}
368
369ParabixDriver::~ParabixDriver() {
370    delete mCache;
371}
Note: See TracBrowser for help on using the repository browser.