Ignore:
Timestamp:
May 22, 2017, 12:14:19 PM (2 years ago)
Author:
nmedfort
Message:

Restructuring work for the Driver classes. Start of work to eliminate the memory leaks with the ExecutionEngine?. Replaced custom AlignedMalloc? with backend call to std::aligned_malloc. Salvaged some work on DistributionPass? for reevaluation.

Location:
icGREP/icgrep-devel/icgrep/toolchain
Files:
4 added
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.cpp

    r5461 r5464  
    88#include <IR_Gen/idisa_target.h>
    99#include <kernels/kernel_builder.h>
     10#include <kernels/kernel.h>
     11#include <llvm/Transforms/Scalar.h>
     12#include <llvm/Transforms/Utils/Local.h>
    1013#include <toolchain/toolchain.h>
    11 #include <IR_Gen/llvm2ptx.h>
    12 #include <llvm/Transforms/Scalar.h>
    13 
     14#include <toolchain/pipeline.h>
     15#include <llvm/Analysis/TargetLibraryInfo.h>
     16#include <llvm/CodeGen/MIRParser/MIRParser.h>
     17#include <llvm/IR/LegacyPassManager.h>
     18#include <llvm/IR/Module.h>
     19#include <llvm/Support/FileSystem.h>
     20#include <llvm/Support/TargetRegistry.h>
     21#include <llvm/Support/TargetSelect.h>
     22#include <llvm/Support/ToolOutputFile.h>
     23#include <llvm/Target/TargetMachine.h>
    1424
    1525using namespace llvm;
    16 using namespace parabix;
    17 
    18 using Kernel = kernel::Kernel;
    19 using KernelBuilder = kernel::KernelBuilder;
    20 
     26
     27using StreamSetBuffer = parabix::StreamSetBuffer;
    2128
    2229NVPTXDriver::NVPTXDriver(std::string && moduleName)
    23 : mContext(new llvm::LLVMContext())
    24 , mMainModule(new Module(moduleName, *mContext))
    25 , iBuilder(nullptr)
    26 , mTarget(nullptr)
    27 , mEngine(nullptr) {
     30: Driver(std::move(moduleName)) {
    2831
    2932    InitializeAllTargets();
     
    3235    InitializeAllAsmParsers();
    3336
    34     PassRegistry *Registry = PassRegistry::getPassRegistry();
     37    PassRegistry * Registry = PassRegistry::getPassRegistry();
    3538    initializeCore(*Registry);
    3639    initializeCodeGen(*Registry);
     
    4144    mMainModule->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    4245    mMainModule->setTargetTriple("nvptx64-nvidia-cuda");
    43     codegen::BlockSize = 64;
    4446
    4547    iBuilder.reset(IDISA::GetIDISA_GPU_Builder(*mContext));
     
    4850}
    4951
    50 ExternalBuffer * NVPTXDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
    51     mOwnedBuffers.emplace_back(std::move(b));
    52     return cast<ExternalBuffer>(mOwnedBuffers.back().get());
    53 }
    54 
    55 StreamSetBuffer * NVPTXDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
    56     b->allocateBuffer(iBuilder);
    57     mOwnedBuffers.emplace_back(std::move(b));
    58     return mOwnedBuffers.back().get();
    59 }
    60 
    61 kernel::Kernel * NVPTXDriver::addKernelInstance(std::unique_ptr<kernel::Kernel> kb) {
    62     mOwnedKernels.emplace_back(std::move(kb));
    63     return mOwnedKernels.back().get();
    64 }
    65 
    66 void NVPTXDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    67     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    68     mPipeline.emplace_back(&kb);
    69     kb.bindPorts(inputs, outputs);
    70     kb.setModule(iBuilder, mMainModule);
    71 }
    72 
    73 void NVPTXDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     52void NVPTXDriver::makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    7453    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    75     mPipeline.emplace_back(kb);   
     54    mPipeline.emplace_back(kb);
    7655    kb->bindPorts(inputs, outputs);
    7756    kb->setModule(iBuilder, mMainModule);
     
    11190}
    11291
     92Function * NVPTXDriver::addLinkFunction(Module *, llvm::StringRef, FunctionType *, void *) const {
     93    report_fatal_error("NVPTX does not support linked functions");
     94}
     95
     96
     97static int llvm2ptx(Module * M, std::string PTXFilename) {
     98
     99    std::unique_ptr<MIRParser> MIR;
     100    Triple TheTriple(M->getTargetTriple());
     101
     102    if (TheTriple.getTriple().empty())
     103        TheTriple.setTriple(sys::getDefaultTargetTriple());
     104
     105    // Get the target specific parser.
     106    std::string Error;
     107    const auto TheTarget = TargetRegistry::lookupTarget(codegen::MArch, TheTriple, Error);
     108    if (!TheTarget) {
     109        report_fatal_error(Error);
     110    }
     111
     112    const auto CPUStr = codegen::getCPUStr();
     113    const auto FeaturesStr = codegen::getFeaturesStr();
     114
     115    std::unique_ptr<TargetMachine> Target(
     116                TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, FeaturesStr,
     117                                               codegen::Options, codegen::RelocModel, codegen::CMModel, codegen::OptLevel));
     118
     119    assert(Target && "Could not allocate target machine!");
     120
     121    // Figure out where we are going to send the output.
     122    std::error_code EC;
     123    sys::fs::OpenFlags OpenFlags = sys::fs::F_None | sys::fs::F_Text;
     124    std::unique_ptr<tool_output_file> Out = llvm::make_unique<tool_output_file>(PTXFilename, EC, OpenFlags);
     125    if (EC) {
     126        errs() << EC.message() << '\n';
     127        return 1;
     128    }
     129
     130    // Build up all of the passes that we want to do to the module.
     131    legacy::PassManager PM;
     132
     133    // Add an appropriate TargetLibraryInfo pass for the module's triple.
     134    TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));
     135
     136    PM.add(new TargetLibraryInfoWrapperPass(TLII));
     137
     138    // Add the target data from the target machine, if it exists, or the module.
     139    M->setDataLayout(Target->createDataLayout());
     140
     141    // Override function attributes based on CPUStr, FeaturesStr, and command line
     142    // flags.
     143    codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
     144
     145    {
     146        raw_pwrite_stream *OS = &Out->os();
     147
     148        AnalysisID StartBeforeID = nullptr;
     149        AnalysisID StartAfterID = nullptr;
     150        AnalysisID StopAfterID = nullptr;
     151        const PassRegistry *PR = PassRegistry::getPassRegistry();
     152        if (!codegen::RunPass.empty()) {
     153            if (!codegen::StartAfter.empty() || !codegen::StopAfter.empty()) {
     154                errs() << "start-after and/or stop-after passes are redundant when run-pass is specified.\n";
     155                return 1;
     156            }
     157            const PassInfo *PI = PR->getPassInfo(codegen::RunPass);
     158            if (!PI) {
     159                errs() << "run-pass pass is not registered.\n";
     160                return 1;
     161            }
     162            StopAfterID = StartBeforeID = PI->getTypeInfo();
     163        } else {
     164            if (!codegen::StartAfter.empty()) {
     165                const PassInfo *PI = PR->getPassInfo(codegen::StartAfter);
     166                if (!PI) {
     167                    errs() << "start-after pass is not registered.\n";
     168                    return 1;
     169                }
     170                StartAfterID = PI->getTypeInfo();
     171            }
     172            if (!codegen::StopAfter.empty()) {
     173                const PassInfo *PI = PR->getPassInfo(codegen::StopAfter);
     174                if (!PI) {
     175                    errs() << "stop-after pass is not registered.\n";
     176                    return 1;
     177                }
     178                StopAfterID = PI->getTypeInfo();
     179            }
     180        }
     181
     182        // Ask the target to add backend passes as necessary.
     183        if (Target->addPassesToEmitFile(PM, *OS, codegen::FileType, false, StartBeforeID,
     184                                        StartAfterID, StopAfterID, MIR.get())) {
     185            errs() << " target does not support generation of this file type!\n";
     186            return 1;
     187        }
     188
     189        PM.run(*M);
     190    }
     191    // Declare success.
     192    Out->keep();
     193
     194    return 0;
     195}
     196
    113197void NVPTXDriver::finalizeAndCompile(Function * mainFunc, std::string PTXFilename) {
    114198
     
    134218    PM.run(*mMainModule); 
    135219
    136     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
    137             mMainModule->dump();
     220    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
     221        mMainModule->dump();
     222    }
    138223
    139224    llvm2ptx(mMainModule, PTXFilename);
    140225}
    141226
    142 const std::unique_ptr<kernel::KernelBuilder> & NVPTXDriver::getBuilder() {
    143     return iBuilder;
    144 }
    145 
    146227NVPTXDriver::~NVPTXDriver() {
    147228}
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.h

    r5462 r5464  
    77#ifndef NVPTXDRIVER_H
    88#define NVPTXDRIVER_H
    9 #include <string>
    10 #include <IR_Gen/FunctionTypeBuilder.h>
    11 #include <kernels/kernel.h>
    12 #include <kernels/streamset.h>
    139
    14 namespace llvm { class ExecutionEngine; }
    15 namespace llvm { class Function; }
    16 namespace llvm { class Module; }
    17 namespace llvm { class TargetMachine; }
    18 namespace llvm { class formatted_raw_ostream; }
    19 namespace llvm { namespace cl { class OptionCategory; } }
    20 namespace kernel { class Kernel; }
    21 namespace kernel { class KernelBuilder; }
    22 namespace IDISA { class IDISA_Builder; }
     10#include "driver.h"
    2311
    24 class NVPTXDriver {
     12class NVPTXDriver final : public Driver {
    2513    friend class CBuilder;
    2614public:
     
    2816
    2917    ~NVPTXDriver();
     18
     19    void generatePipelineIR() override;
    3020   
    31     const std::unique_ptr<kernel::KernelBuilder> & getBuilder();
    32    
    33     parabix::ExternalBuffer * addExternalBuffer(std::unique_ptr<parabix::ExternalBuffer> b);
    34    
    35     parabix::StreamSetBuffer * addBuffer(std::unique_ptr<parabix::StreamSetBuffer> b);
    36    
    37     kernel::Kernel * addKernelInstance(std::unique_ptr<kernel::Kernel> kb);
    38    
    39     void addKernelCall(kernel::Kernel & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    40 
    41     void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    42    
    43     void generatePipelineIR();
    44    
    45     template <typename ExternalFunctionType>
    46     llvm::Function * LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
     21    void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) override;
    4722
    4823    void finalizeAndCompile(llvm::Function * mainFunc, std::string PTXFilename);
    49    
     24
    5025    void * getPointerToMain();
    5126
    52 protected:
     27private:
    5328
    54     llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
     29    llvm::Function * addLinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const override;
    5530
    56 private:
    57     std::unique_ptr<llvm::LLVMContext>                      mContext;
    58     llvm::Module * const                                    mMainModule;
    59     std::unique_ptr<kernel::KernelBuilder>                  iBuilder;
    60     llvm::TargetMachine *                                   mTarget;
    61     llvm::ExecutionEngine *                                 mEngine;
    62 
    63     std::vector<kernel::Kernel *>                           mPipeline;
    64     // Owned kernels and buffers that will persist with this ParabixDriver instance.
    65     std::vector<std::unique_ptr<kernel::Kernel>>            mOwnedKernels;
    66     std::vector<std::unique_ptr<parabix::StreamSetBuffer>>  mOwnedBuffers;
    6731};
    6832
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp

    r5440 r5464  
    11#include "object_cache.h"
    22#include <kernels/kernel.h>
     3#include <kernels/kernel_builder.h>
    34#include <llvm/Support/raw_ostream.h>
    45#include <llvm/Support/MemoryBuffer.h>
     6#include <llvm/IR/Metadata.h>
    57#include <llvm/Support/FileSystem.h>
    68#include <llvm/Support/Path.h>
     
    1012#include <fcntl.h>
    1113#include <boost/filesystem.hpp>
     14#include <boost/range/iterator_range.hpp>
    1215#include <ctime>
    1316
     
    5255                          '_'};
    5356
     57const static auto CACHEABLE = "cacheable";
     58
     59const static auto SIGNATURE = "signature";
     60
     61const static boost::uintmax_t CACHE_SIZE_LIMIT = 5 * 1024 * 1024;
     62
     63const MDString * getSignature(const llvm::Module * const M) {
     64    NamedMDNode * const sig = M->getNamedMetadata(SIGNATURE);
     65    if (sig) {
     66        assert ("empty metadata node" && sig->getNumOperands() == 1);
     67        assert ("no signature payload" && sig->getOperand(0)->getNumOperands() == 1);
     68        return cast<MDString>(sig->getOperand(0)->getOperand(0));
     69    }
     70    return nullptr;
     71}
     72
    5473bool ParabixObjectCache::loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel) {
    5574    if (LLVM_LIKELY(kernel->isCachable())) {
    56 
    5775        Module * const module = kernel->getModule();
    5876        assert ("kernel module cannot be null!" && module);
    5977        const auto moduleId = module->getModuleIdentifier();
    60 
    6178        // Have we already seen this module before?
    62         if (LLVM_UNLIKELY(mCachedObjectMap.count(moduleId) != 0)) {
    63             const auto f = mKernelSignatureMap.find(moduleId);
    64             if (f == mKernelSignatureMap.end()) {
    65                 return kernel->moduleIDisSignature();
    66             } else if (kernel->moduleIDisSignature() || (kernel->makeSignature(idb) != f->second)) {
    67                 return false;
    68             }
     79        if (LLVM_UNLIKELY(mCachedObject.count(moduleId) != 0)) {
    6980            return true;
    7081        }
     
    7889        auto objectBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false);
    7990        if (objectBuffer) {
    80             if (!kernel->moduleIDisSignature()) {
     91            if (kernel->hasSignature()) {
    8192                sys::path::replace_extension(objectName, ".sig");
    8293                const auto signatureBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false);
     
    91102                }
    92103            }
    93             // updae the modified time of the file then add it to our cache
     104            // update the modified time of the file then add it to our cache
    94105            boost::filesystem::last_write_time(objectName.c_str(), time(0));
    95             mCachedObjectMap.emplace(moduleId, std::move(objectBuffer.get()));
     106            mCachedObject.emplace(moduleId, std::move(objectBuffer.get()));
    96107            return true;
    97         } else if (!kernel->moduleIDisSignature()) {
    98             mKernelSignatureMap.emplace(moduleId, kernel->makeSignature(idb));
     108        } else {
     109            // mark this module as cachable
     110            module->getOrInsertNamedMetadata(CACHEABLE);
     111            // if this module has a signature, add it to the metadata
     112            if (kernel->hasSignature()) {
     113                NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE);
     114                assert (md->getNumOperands() == 0);
     115                MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb));               
     116                md->addOperand(MDNode::get(module->getContext(), {sig}));
     117            }
    99118        }
    100119    }
     
    105124// exists, write it out.
    106125void ParabixObjectCache::notifyObjectCompiled(const Module * M, MemoryBufferRef Obj) {
    107     const auto moduleId = M->getModuleIdentifier();
    108     if (mCachedObjectMap.count(moduleId) == 0) {
    109 
     126    if (M->getNamedMetadata(CACHEABLE)) {
     127        const auto moduleId = M->getModuleIdentifier();
    110128        Path objectName(mCachePath);
    111129        sys::path::append(objectName, CACHE_PREFIX);
     
    122140        outfile.close();
    123141
    124         // If this kernel has a signature, write it.
    125         const auto sig = mKernelSignatureMap.find(moduleId);
    126         if (LLVM_UNLIKELY(sig != mKernelSignatureMap.end())) {
     142        // If this module has a signature, write it.
     143        const MDString * const sig = getSignature(M);
     144        if (sig) {
    127145            sys::path::replace_extension(objectName, ".sig");
    128146            raw_fd_ostream sigfile(objectName, EC, sys::fs::F_None);
    129             sigfile << sig->second;
     147            sigfile << sig->getString();
    130148            sigfile.close();
    131149        }
     
    133151}
    134152
    135 /*  May need this.
    136 
    137 void ParabixObjectCache::removeCacheFile(std::string ModuleID) {
    138     Path CacheName(CacheDir);
    139     if (!getCacheFilename(ModuleID, CacheName)) return;
    140     sys::fs::remove(CacheName);
    141     // Also remove a signature file, if present.
    142     sys::path::replace_extension(CacheName, ".sig");
    143     sys::fs::remove(CacheName);
    144 }
    145 */
    146 
    147 std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module* M) {
    148     auto f = mCachedObjectMap.find(M->getModuleIdentifier());
    149     if (f == mCachedObjectMap.end()) {
     153void ParabixObjectCache::cleanUpObjectCacheFiles() {
     154
     155    using namespace boost::filesystem;
     156    using ObjectFile = std::pair<std::time_t, path>;
     157
     158    path cachePath(mCachePath.str());
     159    if (LLVM_LIKELY(is_directory(cachePath))) {
     160        std::vector<ObjectFile> files;
     161        for(const directory_entry & entry : boost::make_iterator_range(directory_iterator(cachePath), {})) {
     162            const auto path = entry.path();;
     163            if (LLVM_LIKELY(is_regular_file(path) && path.has_extension() && path.extension().compare(".o") == 0)) {
     164                files.emplace_back(last_write_time(path), path.filename());
     165            }
     166        }
     167        // sort the files in decending order of last modified (datetime) then file name
     168        std::sort(files.begin(), files.end(), std::greater<ObjectFile>());
     169        boost::uintmax_t cacheSize = 0;
     170        for(const ObjectFile & entry : files) {
     171            auto objectPath = cachePath / std::get<1>(entry);
     172            if (LLVM_LIKELY(exists(objectPath))) {
     173                const auto size = file_size(objectPath);
     174                if ((cacheSize + size) < CACHE_SIZE_LIMIT) {
     175                    cacheSize += size;
     176                } else {
     177                    remove(objectPath);
     178                    objectPath.replace_extension("sig");
     179                    remove(objectPath);
     180                }
     181            }
     182        }
     183    }
     184}
     185
     186std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module * module) {
     187    const auto moduleId = module->getModuleIdentifier();
     188    const auto f = mCachedObject.find(moduleId);
     189    if (f == mCachedObject.end()) {
    150190        return nullptr;
    151191    }
     
    157197    // $HOME/.cache/parabix/
    158198    Path cachePath;
    159     // TODO use path::user_cache_directory once we have llvm >= 3.7.
     199    #ifndef USE_LLVM_3_6
     200    sys::path::user_cache_directory(cachePath, "parabix");
     201    #else
    160202    sys::path::home_directory(cachePath);
    161203    sys::path::append(cachePath, ".cache", "parabix");
     204    #endif
    162205    return cachePath;
    163206}
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.h

    r5440 r5464  
    1111#include <llvm/ExecutionEngine/ObjectCache.h>
    1212#include <llvm/ADT/StringRef.h>
     13#include <boost/container/flat_map.hpp>
     14#include <vector>
    1315#include <string>
    14 #include <boost/container/flat_map.hpp>
    1516
    1617namespace llvm { class Module; }
     
    3637    template <typename K, typename V>
    3738    using Map = boost::container::flat_map<K, V>;
    38     using CacheEntry = std::pair<kernel::Kernel *, std::unique_ptr<llvm::MemoryBuffer>>;
    39     using CacheMap = Map<llvm::Module *, CacheEntry>;
     39    using ModuleCache = Map<std::string, std::unique_ptr<llvm::MemoryBuffer>>;
    4040public:
    4141    ParabixObjectCache();
     
    4343    bool loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel);
    4444    void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) override;
     45    void cleanUpObjectCacheFiles();
    4546    std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module * M) override;
    4647protected:
    4748    static Path getDefaultPath();
    4849private:
    49     Map<std::string, std::string>                           mKernelSignatureMap;
    50     Map<std::string, std::unique_ptr<llvm::MemoryBuffer>>   mCachedObjectMap;
    51     const Path                                              mCachePath;
     50    ModuleCache     mCachedObject;
     51    const Path      mCachePath;
    5252};
    5353
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5456 r5464  
    7777    }
    7878    Value * const segOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     79
     80
     81
    7982
    8083    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc);
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5459 r5464  
    55 */
    66
    7 #include "toolchain.h"
    8 #include <IR_Gen/idisa_target.h>
    9 #include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
    10 #include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
    11 #include <llvm/Support/CommandLine.h>              // for OptionCategory
    12 #include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
    13 #include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
    14 #include <llvm/IR/LegacyPassManager.h>             // for PassManager
    15 #include <llvm/IR/IRPrintingPasses.h>
    16 #include <llvm/InitializePasses.h>                 // for initializeCodeGen
    17 #include <llvm/PassRegistry.h>                     // for PassRegistry
    18 #include <llvm/Support/CodeGen.h>                  // for Level, Level::None
    19 #include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
    20 #include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
    21 #include <llvm/Target/TargetOptions.h>             // for TargetOptions
    22 #include <llvm/Transforms/Scalar.h>
    23 #include <llvm/Transforms/Utils/Local.h>
    24 #include <llvm/IR/Module.h>
    25 #include <toolchain/object_cache.h>
    26 #include <toolchain/pipeline.h>
    27 #include <kernels/kernel_builder.h>
    28 #include <kernels/kernel.h>
    29 #include <sys/stat.h>
    30 #include <llvm/IR/Verifier.h>
    31 #include <toolchain/NVPTXDriver.cpp>
    32 //#include <toolchain/workqueue.h>
    33 
     7#include <toolchain/toolchain.h>
     8#include <llvm/CodeGen/CommandFlags.h>
     9#include <llvm/Support/raw_ostream.h>
    3410
    3511using namespace llvm;
    36 using namespace parabix;
    37 
    38 using Kernel = kernel::Kernel;
    39 using KernelBuilder = kernel::KernelBuilder;
    4012
    4113#ifndef NDEBUG
     
    5931                        clEnumValEnd), cl::cat(CodeGenOptions));
    6032
    61 static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
     33static cl::opt<std::string> IROutputFilenameOption("dump-generated-IR-output", cl::init(""),
     34                                                       cl::desc("output IR filename"), cl::cat(CodeGenOptions));
     35
    6236#ifndef USE_LLVM_3_6
    63 static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
    64 static cl::opt<bool> AsmVerbose("asm-verbose",
    65                                 cl::desc("Add comments to directives."),
    66                                 cl::init(true), cl::cat(CodeGenOptions));
     37static cl::opt<std::string> ASMOutputFilenameOption("asm-output", cl::init(""),
     38                                                    cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
     39
     40static cl::opt<bool> AsmVerbose("asm-verbose", cl::init(true),
     41                                cl::desc("Add comments to directives."), cl::cat(CodeGenOptions));
    6742#endif
    6843
    69 char OptLevel;
    70 static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
    71                               cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
     44static cl::opt<char> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"),
     45                                    cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
    7246
    7347
    74 static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
     48static cl::opt<bool> EnableObjectCacheOption("enable-object-cache",
     49                                             cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
    7550
    76 static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
     51static cl::opt<std::string> ObjectCacheDirOption("object-cache-dir",
     52                                                 cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
    7753
    7854
     55static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0),
     56                                          cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
     57
     58
     59static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize),
     60                                            cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
     61
     62static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::init(1),
     63                                               cl::desc("Buffer Segments"), cl::value_desc("positive integer"));
     64
     65
     66static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::init(2),
     67                                          cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"));
     68
     69
     70static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::init(IN_DEBUG_MODE),
     71                                               cl::desc("Enable Asserts"));
     72
     73static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::init(false),
     74                                                  cl::desc("Count and report CPU cycles per kernel"), cl::cat(CodeGenOptions));
     75
     76static cl::opt<bool, true> pipelineParallelOption("enable-pipeline-parallel", cl::location(pipelineParallel),
     77                                                  cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
     78   
     79static cl::opt<bool, true> segmentPipelineParallelOption("enable-segment-pipeline-parallel", cl::location(segmentPipelineParallel),
     80                                                         cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
     81
     82static cl::opt<bool> USENVPTX("NVPTX", cl::init(false),
     83                              cl::desc("Run on GPU only."));
     84
     85static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::init(256),
     86                                         cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"));
     87
     88
     89const CodeGenOpt::Level OptLevel = [](const char optLevel) {
     90    switch (optLevel) {
     91        case '0': return CodeGenOpt::None;
     92        case '1': return CodeGenOpt::Less;
     93        case '2': return CodeGenOpt::Default;
     94        case '3': return CodeGenOpt::Aggressive;
     95        default: report_fatal_error(optLevel + " is an invalid optimization level.");
     96    }
     97}(OptLevelOption);
     98
     99bool pipelineParallel;
     100bool segmentPipelineParallel;
     101const std::string ASMOutputFilename = ASMOutputFilenameOption;
     102const std::string IROutputFilename = IROutputFilenameOption;
     103const std::string ObjectCacheDir = ObjectCacheDirOption;
    79104int BlockSize;
    80105int SegmentSize;
     
    83108bool EnableAsserts;
    84109bool EnableCycleCounter;
     110const bool EnableObjectCache = EnableObjectCacheOption && (DebugOptions.getBits() == 0);
     111bool NVPTX;
     112int GroupNum;
    85113
    86 static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
    87 static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
    88 static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
    89 static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
    90 static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
    91 static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
     114const llvm::Reloc::Model RelocModel = ::RelocModel;
     115const llvm::CodeModel::Model CMModel = ::CMModel;
     116const std::string MArch = ::MArch;
     117const std::string RunPass = ::RunPass;
     118const llvm::TargetMachine::CodeGenFileType FileType = ::FileType;
     119const std::string StopAfter = ::StopAfter;
     120const std::string StartAfter = ::StartAfter;
     121#ifndef USE_LLVM_3_6
     122const TargetOptions Options = [](const bool asmVerbose) {
     123    TargetOptions opt = InitTargetOptionsFromCodeGenFlags();
     124    opt.MCOptions.AsmVerbose = AsmVerbose;
     125    return opt;
     126}(AsmVerbose);
     127#else
     128const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
     129#endif
    92130
    93 const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
     131const cl::OptionCategory * codegen_flags() {
     132    return &CodeGenOptions;
     133}
    94134
    95 bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
     135bool DebugOptionIsSet(const DebugFlags flag) {
     136    return DebugOptions.isSet(flag);
     137}
    96138
    97 static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
    98    
    99 static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
     139std::string getCPUStr() {
     140    return ::getCPUStr();
     141}
    100142
    101 bool NVPTX;
    102 int GroupNum;
    103 static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
    104 static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
     143std::string getFeaturesStr() {
     144    return ::getFeaturesStr();
     145}
     146
     147void setFunctionAttributes(llvm::StringRef CPU, llvm::StringRef Features, llvm::Module &M) {
     148    return ::setFunctionAttributes(CPU, Features, M);
     149}
     150
    105151
    106152}
    107153
    108 void setNVPTXOption(){
     154void setNVPTXOption() {
    109155    codegen::NVPTX = codegen::USENVPTX;
    110     if(codegen::NVPTX){
    111 #ifndef CUDA_ENABLED
    112     errs() << "CUDA compiler is not supported.\n";
    113     exit(-1);
    114 #endif
     156    if (codegen::NVPTX) {
     157        #ifndef CUDA_ENABLED
     158        report_fatal_error("CUDA compiler is not supported.");
     159        #endif
    115160    }
    116161}
     
    125170}
    126171
    127 void setAllFeatures(EngineBuilder &builder) {
    128     StringMap<bool> HostCPUFeatures;
    129     if (sys::getHostCPUFeatures(HostCPUFeatures)) {
    130         std::vector<std::string> attrs;
    131         for (auto &flag : HostCPUFeatures) {
    132             auto enabled = flag.second ? "+" : "-";
    133             attrs.push_back(enabled + flag.first().str());
    134         }
    135         builder.setMAttrs(attrs);
    136     }
    137 }
    138 
    139172bool AVX2_available() {
    140173    StringMap<bool> HostCPUFeatures;
     
    145178    return false;
    146179}
    147 
    148 ParabixDriver::ParabixDriver(std::string && moduleName)
    149 : mContext(new llvm::LLVMContext())
    150 , mMainModule(new Module(moduleName, *mContext))
    151 , iBuilder(nullptr)
    152 , mTarget(nullptr)
    153 , mEngine(nullptr)
    154 , mCache(nullptr) {
    155 
    156     InitializeNativeTarget();
    157     InitializeNativeTargetAsmPrinter();
    158     InitializeNativeTargetAsmParser();
    159 
    160     PassRegistry * Registry = PassRegistry::getPassRegistry();
    161     initializeCore(*Registry);
    162     initializeCodeGen(*Registry);
    163     initializeLowerIntrinsicsPass(*Registry);
    164 
    165     std::string errMessage;
    166     EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
    167     builder.setUseOrcMCJITReplacement(true);
    168     builder.setErrorStr(&errMessage);
    169     TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
    170     opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
    171     builder.setTargetOptions(opts);
    172     builder.setVerifyModules(false);
    173     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    174     switch (codegen::OptLevel) {
    175         case '0': optLevel = CodeGenOpt::None; break;
    176         case '1': optLevel = CodeGenOpt::Less; break;
    177         case '2': optLevel = CodeGenOpt::Default; break;
    178         case '3': optLevel = CodeGenOpt::Aggressive; break;
    179         default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
    180     }
    181     builder.setOptLevel(optLevel);
    182     setAllFeatures(builder);
    183     mEngine = builder.create();
    184     if (mEngine == nullptr) {
    185         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    186     }
    187     mTarget = builder.selectTarget();
    188     if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
    189         if (codegen::ObjectCacheDir.empty()) {
    190             mCache = new ParabixObjectCache();
    191         } else {
    192             mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
    193         }
    194         mEngine->setObjectCache(mCache);
    195     }
    196 
    197     mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
    198 
    199     iBuilder.reset(IDISA::GetIDISA_Builder(*mContext, mMainModule->getTargetTriple()));
    200     iBuilder->setDriver(this);
    201     iBuilder->setModule(mMainModule);
    202 }
    203 
    204 ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
    205     mOwnedBuffers.emplace_back(std::move(b));
    206     return cast<ExternalBuffer>(mOwnedBuffers.back().get());
    207 }
    208 
    209 StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
    210     b->allocateBuffer(iBuilder);
    211     mOwnedBuffers.emplace_back(std::move(b));
    212     return mOwnedBuffers.back().get();
    213 }
    214 
    215 Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<Kernel> kb) {
    216     mOwnedKernels.emplace_back(std::move(kb));
    217     return mOwnedKernels.back().get();
    218 }
    219 
    220 void ParabixDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    221     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    222     mPipeline.emplace_back(&kb);
    223     kb.bindPorts(inputs, outputs);
    224     kb.makeModule(iBuilder);
    225 }
    226 
    227 void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    228     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    229     mPipeline.emplace_back(kb);   
    230     kb->bindPorts(inputs, outputs);
    231     kb->makeModule(iBuilder);
    232 }
    233 
    234 void ParabixDriver::generatePipelineIR() {
    235     #ifndef NDEBUG
    236     if (LLVM_UNLIKELY(mPipeline.empty())) {
    237         report_fatal_error("Pipeline cannot be empty");
    238     } else {
    239         for (auto i = mPipeline.begin(); i != mPipeline.end(); ++i) {
    240             for (auto j = i; ++j != mPipeline.end(); ) {
    241                 if (LLVM_UNLIKELY(*i == *j)) {
    242                     report_fatal_error("Kernel instances cannot occur twice in the pipeline");
    243                 }
    244             }
    245         }
    246     }
    247     #endif
    248     // note: instantiation of all kernels must occur prior to initialization
    249     for (const auto & k : mPipeline) {
    250         k->addKernelDeclarations(iBuilder);
    251     }
    252     for (const auto & k : mPipeline) {
    253         k->createInstance(iBuilder);
    254     }
    255     for (const auto & k : mPipeline) {
    256         k->initializeInstance(iBuilder);
    257     }
    258     if (codegen::pipelineParallel) {
    259         generateParallelPipeline(iBuilder, mPipeline);
    260     } else if (codegen::segmentPipelineParallel) {
    261         generateSegmentParallelPipeline(iBuilder, mPipeline);
    262     } else {
    263         codegen::ThreadNum = 1;
    264         generatePipelineLoop(iBuilder, mPipeline);
    265     }
    266     for (const auto & k : mPipeline) {
    267         k->finalizeInstance(iBuilder);
    268     }
    269 }
    270 
    271 Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    272     assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
    273     Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
    274     mEngine->addGlobalMapping(f, functionPtr);
    275     return f;
    276 }
    277 
    278 void ParabixDriver::linkAndFinalize() {
    279 
    280     legacy::PassManager PM;
    281     std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    282     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
    283         if (codegen::IROutputFilename.empty()) {
    284             IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    285         } else {
    286             std::error_code error;
    287             IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    288         }
    289         PM.add(createPrintModulePass(*IROutputStream));
    290     }
    291 
    292     if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    293         PM.add(createVerifierPass());
    294     }
    295     PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    296     PM.add(createReassociatePass());             //Reassociate expressions.
    297     PM.add(createGVNPass());                     //Eliminate common subexpressions.
    298     PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    299     PM.add(createCFGSimplificationPass());
    300     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    301         if (LLVM_LIKELY(IROutputStream == nullptr)) {
    302             if (codegen::IROutputFilename.empty()) {
    303                 IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    304             } else {
    305                 std::error_code error;
    306                 IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    307             }
    308         }
    309         PM.add(createPrintModulePass(*IROutputStream));
    310     }
    311 
    312     #ifndef USE_LLVM_3_6
    313     std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    314     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    315         if (codegen::ASMOutputFilename.empty()) {
    316             ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    317         } else {
    318             std::error_code error;
    319             ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
    320         }
    321         if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    322             report_fatal_error("LLVM error: could not add emit assembly pass");
    323         }
    324     }
    325     #endif
    326 
    327     Module * module = nullptr;
    328 
    329     try {
    330 
    331         for (Kernel * const kernel : mPipeline) {
    332             iBuilder->setKernel(kernel);
    333             module = kernel->getModule();
    334             bool uncachedObject = true;
    335             if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    336                 uncachedObject = false;
    337             }
    338             if (uncachedObject) {
    339                 module->setTargetTriple(mMainModule->getTargetTriple());
    340                 kernel->generateKernel(iBuilder);
    341                 PM.run(*module);
    342             }
    343             mEngine->addModule(std::unique_ptr<Module>(module));
    344             mEngine->generateCodeForModule(module);
    345         }
    346 
    347         iBuilder->setKernel(nullptr);
    348         module = mMainModule;
    349         PM.run(*mMainModule);
    350 
    351         mEngine->finalizeObject();
    352 
    353     } catch (const std::exception & e) {
    354         report_fatal_error(e.what());
    355     }
    356 
    357 }
    358 
    359 
    360 //void ParabixDriver::linkAndFinalize() {
    361 
    362 //    legacy::PassManager PM;
    363 //    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    364 //        PM.add(createVerifierPass());
    365 //    }
    366 //    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    367 //    PM.add(createReassociatePass());             //Reassociate expressions.
    368 //    PM.add(createGVNPass());                     //Eliminate common subexpressions.
    369 //    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    370 //    PM.add(createCFGSimplificationPass());
    371 
    372 //    unsigned threadCount = 2; //std::thread::hardware_concurrency();
    373 
    374 //    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    375 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    376 //        threadCount = 1; // If we're dumping IR, disable seperate compilation
    377 //        if (codegen::IROutputFilename.empty()) {
    378 //            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    379 //        } else {
    380 //            std::error_code error;
    381 //            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    382 //        }
    383 //        PM.add(createPrintModulePass(*IROutputStream));
    384 //    }
    385 
    386 //    #ifndef USE_LLVM_3_6
    387 //    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    388 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    389 //        threadCount = 1; // If we're dumping ASM, disable seperate compilation
    390 //        if (codegen::ASMOutputFilename.empty()) {
    391 //            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    392 //        } else {
    393 //            std::error_code error;
    394 //            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
    395 //        }
    396 //        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    397 //            report_fatal_error("LLVM error: could not add emit assembly pass");
    398 //        }
    399 //    }
    400 //    #endif
    401 
    402 //    Module * module = mMainModule;
    403 //    WorkQueue<Module *> Q(mPipeline.size());
    404 //    std::thread compilation_thread[threadCount - 1];
    405 
    406 //    try {
    407 
    408 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    409 //            compilation_thread[i] = std::thread([this, &Q]{
    410 
    411 //                InitializeNativeTarget();
    412 
    413 //                Module * module = nullptr;
    414 //                while (Q.pop(module)) {
    415 //                    mEngine->addModule(std::unique_ptr<Module>(module));
    416 //                    mEngine->generateCodeForModule(module);
    417 //                }
    418 //            });
    419 //        }
    420 
    421 //        module = mMainModule;
    422 //        iBuilder->setKernel(nullptr);
    423 //        PM.run(*mMainModule);
    424 //        Q.push(mMainModule);
    425 
    426 //        for (Kernel * const kernel : mPipeline) {
    427 //            iBuilder->setKernel(kernel);
    428 //            module = kernel->getModule();
    429 //            bool uncachedObject = true;
    430 //            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    431 //                uncachedObject = false;
    432 //            }
    433 //            if (uncachedObject) {
    434 //                module->setTargetTriple(mMainModule->getTargetTriple());
    435 //                kernel->generateKernel(iBuilder);
    436 //                PM.run(*module);
    437 //            }
    438 //            Q.push(module);
    439 //        }
    440 
    441 //        for (;;) {
    442 //            if (Q.empty()) {
    443 //                break;
    444 //            } else if (Q.try_pop(module)) {
    445 //                mEngine->addModule(std::unique_ptr<Module>(module));
    446 //                mEngine->generateCodeForModule(module);
    447 //            }
    448 //        }
    449 
    450 //        Q.notify_all();
    451 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    452 //            compilation_thread[i].join();
    453 //        }
    454 
    455 //        assert (Q.empty());
    456 
    457 //        mEngine->finalizeObject();
    458 
    459 //    } catch (const std::exception & e) {
    460 //        module->dump();
    461 //        report_fatal_error(e.what());
    462 //    }
    463 
    464 //}
    465 
    466 const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
    467     return iBuilder;
    468 }
    469 
    470 void * ParabixDriver::getPointerToMain() {
    471     return mEngine->getPointerToNamedFunction("Main");
    472 }
    473 
    474 ParabixDriver::~ParabixDriver() {
    475     delete mCache;
    476 }
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5458 r5464  
    77#ifndef TOOLCHAIN_H
    88#define TOOLCHAIN_H
    9 #include <string>
    10 #include <IR_Gen/FunctionTypeBuilder.h>
    11 #include <kernels/kernel.h>
    12 #include <kernels/streamset.h>
    139
    14 #include <toolchain/NVPTXDriver.h>
    15 namespace llvm { class ExecutionEngine; }
    16 namespace llvm { class Function; }
    17 namespace llvm { class Module; }
    18 namespace llvm { class TargetMachine; }
    19 namespace llvm { class formatted_raw_ostream; }
     10#include <llvm/ADT/StringRef.h>
     11#include <llvm/Support/CodeGen.h>
     12#include <llvm/Target/TargetOptions.h>
     13#include <llvm/Target/TargetMachine.h>
     14
     15// FIXME: llvm/CodeGen/CommandFlags.h can only be included once or the various cl::opt causes multiple definition
     16// errors. To bypass for now, the relevant options and functions are accessible from here. Re-evaluate with later
     17// versions of LLVM.
     18
    2019namespace llvm { namespace cl { class OptionCategory; } }
    21 namespace kernel { class Kernel; }
    22 namespace kernel { class KernelBuilder; }
    23 namespace IDISA { class IDISA_Builder; }
    24 
    25 class ParabixObjectCache;
    2620
    2721namespace codegen {
     22
    2823const llvm::cl::OptionCategory * codegen_flags();
    2924
     
    3934};
    4035
    41 bool DebugOptionIsSet(DebugFlags flag);
     36bool DebugOptionIsSet(const DebugFlags flag);
    4237
    43 
    44 extern char OptLevel;  // set from command line
     38extern bool pipelineParallel;
     39extern bool segmentPipelineParallel;
     40#ifndef USE_LLVM_3_6
     41extern const std::string ASMOutputFilename;
     42#endif
     43extern const std::string IROutputFilename;
     44extern const std::string ObjectCacheDir;
     45extern const llvm::CodeGenOpt::Level OptLevel;  // set from command line
    4546extern int BlockSize;  // set from command line
    4647extern int SegmentSize;  // set from command line
    4748extern int BufferSegments;
    4849extern int ThreadNum;
     50extern const bool EnableObjectCache;
    4951extern bool EnableAsserts;
    5052extern bool EnableCycleCounter;
    5153extern bool NVPTX;
    5254extern int GroupNum;
     55extern const llvm::TargetOptions Options;
     56extern const llvm::Reloc::Model RelocModel;
     57extern const llvm::CodeModel::Model CMModel;
     58extern const std::string MArch;
     59extern const std::string RunPass;
     60extern const llvm::TargetMachine::CodeGenFileType FileType;
     61extern const std::string StopAfter;
     62extern const std::string StartAfter;
     63
     64std::string getCPUStr();
     65std::string getFeaturesStr();
     66void setFunctionAttributes(llvm::StringRef CPU, llvm::StringRef Features, llvm::Module &M);
     67
    5368}
    5469
     
    6075bool AVX2_available();
    6176
    62 class ParabixDriver {
    63     friend class CBuilder;
    64 public:
    65     ParabixDriver(std::string && moduleName);
    66 
    67     ~ParabixDriver();
    68    
    69     const std::unique_ptr<kernel::KernelBuilder> & getBuilder();
    70    
    71     parabix::ExternalBuffer * addExternalBuffer(std::unique_ptr<parabix::ExternalBuffer> b);
    72    
    73     parabix::StreamSetBuffer * addBuffer(std::unique_ptr<parabix::StreamSetBuffer> b);
    74    
    75     kernel::Kernel * addKernelInstance(std::unique_ptr<kernel::Kernel> kb);
    76    
    77     void addKernelCall(kernel::Kernel & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    78 
    79     void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    80    
    81     void generatePipelineIR();
    82    
    83     template <typename ExternalFunctionType>
    84     llvm::Function * LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
    85 
    86     void linkAndFinalize();
    87    
    88     void * getPointerToMain();
    89 
    90 protected:
    91 
    92     llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    93 
    94 private:
    95     std::unique_ptr<llvm::LLVMContext>                      mContext;
    96     llvm::Module * const                                    mMainModule;
    97     std::unique_ptr<kernel::KernelBuilder>                  iBuilder;
    98     llvm::TargetMachine *                                   mTarget;
    99     llvm::ExecutionEngine *                                 mEngine;
    100     ParabixObjectCache *                                    mCache;
    101 
    102     std::vector<kernel::Kernel *>                           mPipeline;
    103     // Owned kernels and buffers that will persist with this ParabixDriver instance.
    104     std::vector<std::unique_ptr<kernel::Kernel>>            mOwnedKernels;
    105     std::vector<std::unique_ptr<parabix::StreamSetBuffer>>  mOwnedBuffers;
    106 };
    107 
    108 template <typename ExternalFunctionType>
    109 llvm::Function * ParabixDriver::LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const {
    110     llvm::FunctionType * const type = FunctionTypeBuilder<ExternalFunctionType>::get(*mContext.get());
    111     assert ("FunctionTypeBuilder did not resolve a function type." && type);
    112     return LinkFunction(kb.getModule(), name, type, reinterpret_cast<void *>(functionPtr));
    113 }
    114 
    11577#endif
Note: See TracChangeset for help on using the changeset viewer.