source: icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp @ 5418

Last change on this file since 5418 was 5418, checked in by nmedfort, 2 years ago

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File size: 14.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "toolchain.h"
8#include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
9#include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
10#include <llvm/Support/CommandLine.h>              // for OptionCategory
11#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
12#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
13#include <llvm/IR/LegacyPassManager.h>             // for PassManager
14#include <llvm/IR/IRPrintingPasses.h>
15#include <llvm/InitializePasses.h>                 // for initializeCodeGen
16#ifndef NDEBUG
17#include <llvm/IR/Verifier.h>
18#include <boost/container/flat_set.hpp>
19#endif
20#include <llvm/PassRegistry.h>                     // for PassRegistry
21#include <llvm/Support/CodeGen.h>                  // for Level, Level::None
22#include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
23#include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
24#include <llvm/Target/TargetOptions.h>             // for TargetOptions
25#include <llvm/Transforms/Scalar.h>
26#include <llvm/Transforms/Utils/Local.h>
27#include <llvm/IR/Module.h>
28#include <kernels/object_cache.h>
29#include <kernels/pipeline.h>
30#include <kernels/kernel.h>
31#include <sys/stat.h>
32
33using namespace llvm;
34using namespace parabix;
35
36namespace codegen {
37
38static cl::OptionCategory CodeGenOptions("Code Generation Options", "These options control code generation.");
39
40static cl::bits<DebugFlags>
41DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
42#ifndef USE_LLVM_3_6
43                        clEnumVal(ShowASM, "Print assembly code."),
44#endif
45                        clEnumVal(SerializeThreads, "Force segment threads to run sequentially."),
46                        clEnumValEnd), cl::cat(CodeGenOptions));
47
48static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
49#ifndef USE_LLVM_3_6
50static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
51static cl::opt<bool> AsmVerbose("asm-verbose",
52                                cl::desc("Add comments to directives."),
53                                cl::init(true), cl::cat(CodeGenOptions));
54#endif
55
56char OptLevel;
57static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
58                              cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
59
60
61static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
62
63static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
64
65
66int BlockSize;
67int SegmentSize;
68int BufferSegments;
69int ThreadNum;
70bool EnableAsserts;
71#ifndef NDEBUG
72#define DEFAULT_TO_TRUE_IN_DEBUG_MODE true
73#else
74#define DEFAULT_TO_TRUE_IN_DEBUG_MODE false
75#endif
76
77static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
78static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
79static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
80static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
81static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(DEFAULT_TO_TRUE_IN_DEBUG_MODE));
82
83const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
84
85bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
86
87static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
88   
89static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
90   
91#ifdef CUDA_ENABLED
92bool NVPTX;
93int GroupNum;
94static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
95static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
96#endif
97
98}
99
100#ifdef CUDA_ENABLED
101void setNVPTXOption(){
102    codegen::NVPTX = codegen::USENVPTX;
103}
104
105void Compile2PTX (Module * m, std::string IRFilename, std::string PTXFilename) {
106    InitializeAllTargets();
107    InitializeAllTargetMCs();
108    InitializeAllAsmPrinters();
109    InitializeAllAsmParsers();
110
111    PassRegistry *Registry = PassRegistry::getPassRegistry();
112    initializeCore(*Registry);
113    initializeCodeGen(*Registry);
114    initializeLoopStrengthReducePass(*Registry);
115    initializeLowerIntrinsicsPass(*Registry);
116    initializeUnreachableBlockElimPass(*Registry);
117
118    std::error_code error;
119    raw_fd_ostream out(IRFilename, error, sys::fs::OpenFlags::F_None);
120    m->print(out, nullptr);
121
122    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
123            m->dump();
124
125    llvm2ptx(IRFilename, PTXFilename);
126}
127#endif
128
129void printParabixVersion () {
130    raw_ostream &OS = outs();
131    OS << "Parabix (http://parabix.costar.sfu.ca/):\n  " << "Parabix revision " << PARABIX_VERSION << "\n";
132}
133
134void AddParabixVersionPrinter() {
135    cl::AddExtraVersionPrinter(&printParabixVersion);
136}
137
138void setAllFeatures(EngineBuilder &builder) {
139    StringMap<bool> HostCPUFeatures;
140    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
141        std::vector<std::string> attrs;
142        for (auto &flag : HostCPUFeatures) {
143            auto enabled = flag.second ? "+" : "-";
144            attrs.push_back(enabled + flag.first().str());
145        }
146        builder.setMAttrs(attrs);
147    }
148}
149
150bool AVX2_available() {
151    StringMap<bool> HostCPUFeatures;
152    if (sys::getHostCPUFeatures(HostCPUFeatures)) {
153        auto f = HostCPUFeatures.find("avx2");
154        return ((f != HostCPUFeatures.end()) && f->second);
155    }
156    return false;
157}
158
159ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
160: iBuilder(iBuilder)
161, mMainModule(iBuilder->getModule())
162, mTarget(nullptr)
163, mEngine(nullptr)
164, mCache(nullptr)
165{
166    InitializeNativeTarget();
167    InitializeNativeTargetAsmPrinter();
168    InitializeNativeTargetAsmParser();
169
170    PassRegistry * Registry = PassRegistry::getPassRegistry();
171    initializeCore(*Registry);
172    initializeCodeGen(*Registry);
173    initializeLowerIntrinsicsPass(*Registry);
174
175    std::string errMessage;
176    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
177    builder.setErrorStr(&errMessage);
178    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
179    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
180
181    builder.setTargetOptions(opts);
182    builder.setVerifyModules(true);
183    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
184    switch (codegen::OptLevel) {
185        case '0': optLevel = CodeGenOpt::None; break;
186        case '1': optLevel = CodeGenOpt::Less; break;
187        case '2': optLevel = CodeGenOpt::Default; break;
188        case '3': optLevel = CodeGenOpt::Aggressive; break;
189        default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
190    }
191    builder.setOptLevel(optLevel);
192
193    setAllFeatures(builder);
194
195    mEngine = builder.create();
196    if (mEngine == nullptr) {
197        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
198    }
199    mTarget = builder.selectTarget();
200    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
201        if (codegen::ObjectCacheDir.empty()) {
202            mCache = new ParabixObjectCache();
203        } else {
204            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
205        }
206        assert (mCache);
207        mEngine->setObjectCache(mCache);
208    }
209}
210
211ExternalFileBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalFileBuffer> b, Value * externalBuf) {
212    ExternalFileBuffer * rawBuf = b.get();
213    mOwnedBuffers.push_back(std::move(b));
214    rawBuf->setStreamSetBuffer(externalBuf);
215    return rawBuf;
216}
217
218StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
219    b->allocateBuffer();
220    mOwnedBuffers.push_back(std::move(b));
221    return mOwnedBuffers.back().get();
222}
223
224kernel::KernelBuilder * ParabixDriver::addKernelInstance(std::unique_ptr<kernel::KernelBuilder> kb) {
225    mOwnedKernels.push_back(std::move(kb));
226    return mOwnedKernels.back().get();
227}
228
229void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
230    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
231    mPipeline.push_back(&kb);
232    kb.createKernelStub(inputs, outputs);
233}
234
235void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
236    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
237    mPipeline.push_back(kb);
238    kb->createKernelStub(inputs, outputs);
239}
240
241void ParabixDriver::generatePipelineIR() {
242    #ifndef NDEBUG
243    if (LLVM_UNLIKELY(mPipeline.empty())) {
244        report_fatal_error("Pipeline must contain at least one kernel");
245    } else {
246        boost::container::flat_set<kernel::KernelBuilder *> K(mPipeline.begin(), mPipeline.end());
247        if (LLVM_UNLIKELY(K.size() != mPipeline.size())) {
248            report_fatal_error("Kernel definitions can only occur once in the pipeline");
249        }
250    }
251    #endif
252    // note: instantiation of all kernels must occur prior to initialization
253    for (const auto & k : mPipeline) {
254        k->addKernelDeclarations(mMainModule);
255    }
256    for (const auto & k : mPipeline) {
257        k->createInstance();
258    }
259    for (const auto & k : mPipeline) {
260        k->initializeInstance();
261    }
262    if (codegen::pipelineParallel) {
263        generateParallelPipeline(iBuilder, mPipeline);
264    } else if (codegen::segmentPipelineParallel) {
265        generateSegmentParallelPipeline(iBuilder, mPipeline);
266    } else {
267        codegen::ThreadNum = 1;
268        generatePipelineLoop(iBuilder, mPipeline);
269    }
270    for (const auto & k : mPipeline) {
271        k->finalizeInstance();
272    }
273}
274
275void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
276    assert ("addKernelCall or makeKernelCall must be called before addExternalLink" && (kb.getModule() != nullptr));
277    mEngine->addGlobalMapping(cast<Function>(kb.getModule()->getOrInsertFunction(name, type)), functionPtr);
278}
279
280uint64_t file_size(const uint32_t fd) {
281    struct stat st;
282    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
283        st.st_size = 0;
284    }
285    return st.st_size;
286}
287
288void ParabixDriver::linkAndFinalize() {
289    Module * m = mMainModule;
290    #ifndef NDEBUG
291    try {
292    #endif
293    legacy::PassManager PM;
294    #ifndef NDEBUG
295    PM.add(createVerifierPass());
296    #endif
297    PM.add(createReassociatePass());             //Reassociate expressions.
298    PM.add(createGVNPass());                     //Eliminate common subexpressions.
299    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
300    PM.add(createCFGSimplificationPass());
301
302    raw_fd_ostream * IROutputStream = nullptr;
303    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
304        if (codegen::IROutputFilename.empty()) {
305            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
306        } else {
307            std::error_code error;
308            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
309        }
310        PM.add(createPrintModulePass(*IROutputStream));
311    }
312
313    #ifndef USE_LLVM_3_6
314    raw_fd_ostream * ASMOutputStream = nullptr;
315    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
316        if (codegen::ASMOutputFilename.empty()) {
317            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
318        } else {
319            std::error_code error;
320            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
321        }
322        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
323            report_fatal_error("LLVM error: could not add emit assembly pass");
324        }
325    }
326    #endif
327
328    FunctionType * fileSizeType = FunctionType::get(iBuilder->getInt64Ty(), { iBuilder->getInt32Ty() });
329    mEngine->addGlobalMapping(cast<Function>(mMainModule->getOrInsertFunction("file_size", fileSizeType)), (void *)&file_size);
330
331    PM.run(*m);
332    for (kernel::KernelBuilder * const kb : mPipeline) {
333        m = kb->getModule();
334        bool uncachedObject = true;
335        if (mCache) {
336            const std::string moduleID = m->getModuleIdentifier();
337            const std::string signature = kb->generateKernelSignature(moduleID);
338            if (mCache->loadCachedObjectFile(moduleID, signature)) {
339                uncachedObject = false;
340            }
341        }
342        if (uncachedObject) {
343            Module * const cm = iBuilder->getModule();
344            iBuilder->setModule(m);
345            kb->generateKernel();
346            PM.run(*m);
347            iBuilder->setModule(cm);
348        }       
349        mEngine->addModule(std::unique_ptr<Module>(m));
350    }   
351    mEngine->finalizeObject();
352
353    delete IROutputStream;
354    #ifndef USE_LLVM_3_6
355    delete ASMOutputStream;
356    #endif
357    #ifndef NDEBUG
358    } catch (...) { m->dump(); throw; }
359    #endif
360}
361
362void * ParabixDriver::getPointerToMain() {
363    return mEngine->getPointerToNamedFunction("Main");
364}
365
366ParabixDriver::~ParabixDriver() {
367    delete mCache;
368}
Note: See TracBrowser for help on using the repository browser.