source: icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp @ 5429

Last change on this file since 5429 was 5429, checked in by nmedfort, 2 years ago

Refactored source kernels. icgrep from stdin should now be able to handle any file size.

File size: 14.3 KB
RevLine 
[4801]1/*
[5033]2 *  Copyright (c) 2016 International Characters.
[4801]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[5267]7#include "toolchain.h"
[5425]8#include <IR_Gen/idisa_target.h>
[5267]9#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
10#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
11#include <llvm/Support/CommandLine.h>              // for OptionCategory
12#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
13#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
14#include <llvm/IR/LegacyPassManager.h>             // for PassManager
[5401]15#include <llvm/IR/IRPrintingPasses.h>
[5402]16#include <llvm/InitializePasses.h>                 // for initializeCodeGen
17#ifndef NDEBUG
[5351]18#include <llvm/IR/Verifier.h>
[5411]19#include <boost/container/flat_set.hpp>
[5402]20#endif
[5267]21#include <llvm/PassRegistry.h>                     // for PassRegistry
22#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
23#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
24#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
25#include <llvm/Target/TargetOptions.h>             // for TargetOptions
[5350]26#include <llvm/Transforms/Scalar.h>
27#include <llvm/Transforms/Utils/Local.h>
[5398]28#include <llvm/IR/Module.h>
[5425]29#include <toolchain/object_cache.h>
30#include <toolchain/pipeline.h>
[5391]31#include <kernels/kernel.h>
[5418]32#include <sys/stat.h>
[5398]33
[5033]34using namespace llvm;
[5409]35using namespace parabix;
[4939]36
[5033]37namespace codegen {
[4939]38
[5033]39static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
[5347]40
[5295]41static cl::bits<DebugFlags>
42DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
[5401]43#ifndef USE_LLVM_3_6
[5295]44                        clEnumVal(ShowASM, "Print assembly code."),
45#endif
46                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
47                        clEnumValEnd), cl::cat(CodeGenOptions));
[4939]48
[5033]49static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
[5401]50#ifndef USE_LLVM_3_6
[5156]51static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
[5110]52static cl::opt<bool> AsmVerbose("asm-verbose",
53                                cl::desc("Add comments to directives."),
54                                cl::init(true), cl::cat(CodeGenOptions));
[5295]55#endif
[4939]56
[5033]57char OptLevel;
58static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
59                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
[4801]60
[4959]61
[5401]62static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
[4959]63
[5033]64static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
[4959]65
[4801]66
[5033]67int BlockSize;
68int SegmentSize;
[5135]69int BufferSegments;
[5165]70int ThreadNum;
[5347]71bool EnableAsserts;
[5424]72bool EnableCycleCounter;
[5353]73#ifndef NDEBUG
[5425]74#define IN_DEBUG_MODE true
[5353]75#else
[5425]76#define IN_DEBUG_MODE false
[5353]77#endif
[4962]78
[5033]79static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
80static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
[5135]81static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
[5165]82static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
[5425]83static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
[5424]84static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
[4962]85
[5033]86const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
[4962]87
[5295]88bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
89
[5364]90static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
[5295]91   
[5364]92static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
93   
[5151]94#ifdef CUDA_ENABLED
95bool NVPTX;
[5314]96int GroupNum;
[5151]97static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
[5314]98static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
[5151]99#endif
100
[5033]101}
[4801]102
[5151]103#ifdef CUDA_ENABLED
104void setNVPTXOption(){
105    codegen::NVPTX = codegen::USENVPTX;
106}
107
108void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
109    InitializeAllTargets();
110    InitializeAllTargetMCs();
111    InitializeAllAsmPrinters();
112    InitializeAllAsmParsers();
113
114    PassRegistry *Registry = PassRegistry::getPassRegistry();
115    initializeCore(*Registry);
116    initializeCodeGen(*Registry);
117    initializeLoopStrengthReducePass(*Registry);
118    initializeLowerIntrinsicsPass(*Registry);
119    initializeUnreachableBlockElimPass(*Registry);
120
121    std::error_code error;
[5350]122    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
[5151]123    m->print(out, nullptr);
124
[5295]125    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
[5151]126            m->dump();
127
128    llvm2ptx(IRFilename, PTXFilename);
129}
130#endif
131
[5373]132void printParabixVersion () {
133    raw_ostream &OS = outs();
134    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
135}
[5151]136
[5373]137void AddParabixVersionPrinter() {
138    cl::AddExtraVersionPrinter(&printParabixVersion);
139}
140
[5067]141void setAllFeatures(EngineBuilder &builder) {
[5350]142    StringMap<bool> HostCPUFeatures;
143    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
[5067]144        std::vector<std::string> attrs;
145        for (auto &flag : HostCPUFeatures) {
146            auto enabled = flag.second ? "+" : "-";
147            attrs.push_back(enabled + flag.first().str());
148        }
149        builder.setMAttrs(attrs);
150    }
151}
152
[5108]153bool AVX2_available() {
[5350]154    StringMap<bool> HostCPUFeatures;
155    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
[5108]156        auto f = HostCPUFeatures.find("avx2");
157        return ((f != HostCPUFeatures.end()) && f->second);
158    }
159    return false;
160}
161
[5425]162ParabixDriver::ParabixDriver(std::string && moduleName)
163: mContext(new llvm::LLVMContext())
164, mMainModule(new Module(moduleName, *mContext))
165, iBuilder(nullptr)
[5398]166, mTarget(nullptr)
167, mEngine(nullptr)
[5425]168, mCache(nullptr) {
169
[5391]170    InitializeNativeTarget();
171    InitializeNativeTargetAsmPrinter();
172    InitializeNativeTargetAsmParser();
173
174    PassRegistry * Registry = PassRegistry::getPassRegistry();
175    initializeCore(*Registry);
176    initializeCodeGen(*Registry);
177    initializeLowerIntrinsicsPass(*Registry);
178
179    std::string errMessage;
180    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
181    builder.setErrorStr(&errMessage);
182    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
183    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
184
185    builder.setTargetOptions(opts);
[5425]186    builder.setVerifyModules(IN_DEBUG_MODE);
[5391]187    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
188    switch (codegen::OptLevel) {
189        case '0': optLevel = CodeGenOpt::None; break;
190        case '1': optLevel = CodeGenOpt::Less; break;
191        case '2': optLevel = CodeGenOpt::Default; break;
192        case '3': optLevel = CodeGenOpt::Aggressive; break;
193        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
194    }
195    builder.setOptLevel(optLevel);
196
197    setAllFeatures(builder);
[5398]198    mEngine = builder.create();
199    if (mEngine == nullptr) {
200        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
201    }
202    mTarget = builder.selectTarget();
[5401]203    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
[5398]204        if (codegen::ObjectCacheDir.empty()) {
[5402]205            mCache = new ParabixObjectCache();
[5398]206        } else {
[5402]207            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
[5398]208        }
[5399]209        assert (mCache);
[5402]210        mEngine->setObjectCache(mCache);
[5398]211    }
[5425]212
213    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
214
215    iBuilder.reset(IDISA::GetIDISA_Builder(mMainModule));
216    iBuilder->setDriver(this);
[5398]217}
218
[5429]219ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
220    mOwnedBuffers.emplace_back(std::move(b));
221    return cast<ExternalBuffer>(mOwnedBuffers.back().get());
[5409]222}
223
224StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
225    b->allocateBuffer();
[5429]226    mOwnedBuffers.emplace_back(std::move(b));
[5409]227    return mOwnedBuffers.back().get();
228}
229
[5414]230kernel::KernelBuilder * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::KernelBuilder> kb) {
[5429]231    mOwnedKernels.emplace_back(std::move(kb));
[5414]232    return mOwnedKernels.back().get();
233}
[5409]234
[5391]235void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
[5418]236    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
[5429]237    mPipeline.emplace_back(&kb);
[5418]238    kb.createKernelStub(inputs, outputs);
[5391]239}
240
[5414]241void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
[5418]242    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
[5429]243    mPipeline.emplace_back(kb);
[5418]244    kb->createKernelStub(inputs, outputs);
[5414]245}
246
[5391]247void ParabixDriver::generatePipelineIR() {
[5411]248    #ifndef NDEBUG
[5414]249    if (LLVM_UNLIKELY(mPipeline.empty())) {
[5411]250        report_fatal_error("Pipeline must contain at least one kernel");
251    } else {
[5414]252        boost::container::flat_set<kernel::KernelBuilder *> K(mPipeline.begin(), mPipeline.end());
253        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
[5411]254            report_fatal_error("Kernel definitions can only occur once in the pipeline");
255        }
256    }
257    #endif
[5425]258
[5408]259    // note: instantiation of all kernels must occur prior to initialization
[5414]260    for (const auto & k : mPipeline) {
[5408]261        k->addKernelDeclarations(mMainModule);
[5391]262    }
[5429]263
[5414]264    for (const auto & k : mPipeline) {
[5408]265        k->createInstance();
266    }
[5429]267
[5414]268    for (const auto & k : mPipeline) {
[5408]269        k->initializeInstance();
270    }
[5429]271
[5391]272    if (codegen::pipelineParallel) {
[5414]273        generateParallelPipeline(iBuilder, mPipeline);
[5391]274    } else if (codegen::segmentPipelineParallel) {
[5414]275        generateSegmentParallelPipeline(iBuilder, mPipeline);
[5391]276    } else {
277        codegen::ThreadNum = 1;
[5414]278        generatePipelineLoop(iBuilder, mPipeline);
[5391]279    }
[5429]280
[5414]281    for (const auto & k : mPipeline) {
[5418]282        k->finalizeInstance();
[5411]283    }
[5391]284}
285
[5425]286Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
287    assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
288    Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
289    mEngine->addGlobalMapping(f, functionPtr);
290    return f;
[5398]291}
292
[5391]293void ParabixDriver::linkAndFinalize() {
[5401]294    Module * m = mMainModule;
295    #ifndef NDEBUG
296    try {
297    #endif
298    legacy::PassManager PM;
299    #ifndef NDEBUG
300    PM.add(createVerifierPass());
301    #endif
[5422]302    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
[5401]303    PM.add(createReassociatePass());             //Reassociate expressions.
304    PM.add(createGVNPass());                     //Eliminate common subexpressions.
305    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
306    PM.add(createCFGSimplificationPass());
307
308    raw_fd_ostream * IROutputStream = nullptr;
309    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
310        if (codegen::IROutputFilename.empty()) {
311            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
312        } else {
313            std::error_code error;
314            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
[5398]315        }
[5401]316        PM.add(createPrintModulePass(*IROutputStream));
317    }
318
319    #ifndef USE_LLVM_3_6
320    raw_fd_ostream * ASMOutputStream = nullptr;
321    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
322        if (codegen::ASMOutputFilename.empty()) {
323            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
324        } else {
325            std::error_code error;
326            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
327        }
328        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
329            report_fatal_error("LLVM error: could not add emit assembly pass");
330        }
331    }
332    #endif
333
334    PM.run(*m);
[5418]335    for (kernel::KernelBuilder * const kb : mPipeline) {
336        m = kb->getModule();
[5399]337        bool uncachedObject = true;
338        if (mCache) {
[5401]339            const std::string moduleID = m->getModuleIdentifier();
340            const std::string signature = kb->generateKernelSignature(moduleID);
[5399]341            if (mCache->loadCachedObjectFile(moduleID, signature)) {
342                uncachedObject = false;
343            }
[5394]344        }
[5399]345        if (uncachedObject) {
[5401]346            Module * const cm = iBuilder->getModule();
347            iBuilder->setModule(m);
[5391]348            kb->generateKernel();
[5401]349            PM.run(*m);
350            iBuilder->setModule(cm);
[5399]351        }       
[5401]352        mEngine->addModule(std::unique_ptr<Module>(m));
353    }   
[5391]354    mEngine->finalizeObject();
[5401]355
356    delete IROutputStream;
357    #ifndef USE_LLVM_3_6
358    delete ASMOutputStream;
359    #endif
360    #ifndef NDEBUG
361    } catch (...) { m->dump(); throw; }
362    #endif
[5391]363}
364
365void * ParabixDriver::getPointerToMain() {
366    return mEngine->getPointerToNamedFunction("Main");
367}
[5402]368
369ParabixDriver::~ParabixDriver() {
370    delete mCache;
371}
Note: See TracBrowser for help on using the repository browser.