Ignore:
Timestamp:
May 22, 2017, 12:14:19 PM (2 years ago)
Author:
nmedfort
Message:

Restructuring work for the Driver classes. Start of work to eliminate the memory leaks with the ExecutionEngine?. Replaced custom AlignedMalloc? with backend call to std::aligned_malloc. Salvaged some work on DistributionPass? for reevaluation.

Location:
icGREP/icgrep-devel/icgrep
Files:
5 added
1 deleted
51 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5459 r5464  
    5959SET(OBJECT_CACHE_SRC toolchain/object_cache.cpp)
    6060
    61 SET(TOOLCHAIN_SRC toolchain/toolchain.cpp  toolchain/pipeline.cpp ${OBJECT_CACHE_SRC})
     61SET(TOOLCHAIN_SRC toolchain/toolchain.cpp toolchain/pipeline.cpp toolchain/driver.cpp)
     62
     63SET(DRIVER_SRC toolchain/cpudriver.cpp toolchain/NVPTXDriver.cpp)
    6264
    6365SET(KERNEL_SRC kernels/kernel.cpp kernels/streamset.cpp kernels/interface.cpp kernels/kernel_builder.cpp)
     
    7072SET(PABLO_SRC ${PABLO_SRC} pablo/analysis/pabloverifier.cpp)
    7173SET(PABLO_SRC ${PABLO_SRC} pablo/passes/ssapass.cpp)
    72 SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/pablo_simplifier.cpp pablo/optimizers/codemotionpass.cpp pablo/passes/flattenif.cpp)
     74SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/pablo_simplifier.cpp pablo/optimizers/codemotionpass.cpp pablo/optimizers/distributivepass.cpp pablo/passes/flattenif.cpp)
    7375IF(ENABLE_MULTIPLEXING)
    7476SET(PABLO_SRC ${PABLO_SRC} pablo/optimizers/booleanreassociationpass.cpp)
     
    7678ENDIF()
    7779
    78 add_library(CodeGen ${TOOLCHAIN_SRC} ${KERNEL_SRC} ${IDISA_SRC})
     80add_library(CodeGen ${TOOLCHAIN_SRC} ${DRIVER_SRC} ${OBJECT_CACHE_SRC} ${KERNEL_SRC} ${IDISA_SRC})
    7981add_library(PabloADT ${PABLO_SRC})
    8082add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
     
    8890
    8991# add the executable
    90 target_link_libraries (PabloADT CodeGen ${REQ_LLVM_LIBRARIES})
     92target_link_libraries (CodeGen ${REQ_LLVM_LIBRARIES})
     93target_link_libraries (PabloADT CodeGen)
    9194target_link_libraries (CCADT PabloADT)
    9295target_link_libraries (UCDlib RegExpADT PabloADT CCADT)
     
    206209
    207210SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG")
    208 SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer")
     211SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -g -fno-omit-frame-pointer -fsanitize=address")
    209212
    210213add_test(
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5454 r5464  
    1313#include <llvm/Support/raw_ostream.h>
    1414#include <toolchain/toolchain.h>
     15#include <toolchain/driver.h>
     16#include <stdlib.h>
    1517#include <sys/mman.h>
     18#include <unistd.h>
    1619
    1720using namespace llvm;
     
    218221Value * CBuilder::CreateMalloc(Value * size) {
    219222    Module * const m = getModule();
    220     DataLayout DL(m);
    221     IntegerType * const intTy = getIntPtrTy(DL);
    222     if (size->getType() != intTy) {
    223         if (isa<Constant>(size)) {
    224             size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    225         } else {
    226             size = CreateZExtOrTrunc(size, intTy);
    227         }
    228     }   
    229     PointerType * const voidPtrTy = getVoidPtrTy();
     223    IntegerType * const sizeTy = getSizeTy();   
    230224    Function * malloc = m->getFunction("malloc");
    231225    if (malloc == nullptr) {
    232         FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
     226//        malloc = LinkFunction("malloc", &std::malloc);
     227        PointerType * const voidPtrTy = getVoidPtrTy();
     228        FunctionType * fty = FunctionType::get(voidPtrTy, {sizeTy}, false);
    233229        malloc = Function::Create(fty, Function::ExternalLinkage, "malloc", m);
    234230        malloc->setCallingConv(CallingConv::C);
    235231        malloc->setDoesNotAlias(0);
    236232    }
    237     assert (size->getType() == intTy);
    238     CallInst * ci = CreateCall(malloc, size); assert (ci);
    239     ci->setTailCall();
    240     ci->setCallingConv(malloc->getCallingConv());
    241     Value * ptr = CreatePointerCast(ci, voidPtrTy); assert (ptr);
    242     CreateAssert(ptr, "FATAL ERROR: out of memory");
     233    size = CreateZExtOrTrunc(size, sizeTy);
     234    CallInst * const ptr = CreateCall(malloc, size);
     235    ptr->setTailCall();
     236    CreateAssert(ptr, "CreateMalloc: returned null pointer");
    243237    return ptr;
    244238}
     
    249243    }
    250244    Module * const m = getModule();
    251     DataLayout DL(m);
    252     IntegerType * const intTy = getIntPtrTy(DL);
    253     Function * aligned_malloc = m->getFunction("aligned_malloc" + std::to_string(alignment));
     245    Function * aligned_malloc = m->getFunction("aligned_alloc");
     246    IntegerType * const sizeTy = getSizeTy();
    254247    if (LLVM_UNLIKELY(aligned_malloc == nullptr)) {
    255         const auto ip = saveIP();
    256248        PointerType * const voidPtrTy = getVoidPtrTy();
    257         FunctionType * fty = FunctionType::get(voidPtrTy, {intTy}, false);
    258         aligned_malloc = Function::Create(fty, Function::InternalLinkage, "aligned_malloc" + std::to_string(alignment), m);
     249        FunctionType * const fty = FunctionType::get(voidPtrTy, {sizeTy, sizeTy}, false);
     250        aligned_malloc = Function::Create(fty, Function::ExternalLinkage, "aligned_alloc", m);
    259251        aligned_malloc->setCallingConv(CallingConv::C);
    260252        aligned_malloc->setDoesNotAlias(0);
    261         aligned_malloc->addFnAttr(Attribute::AlwaysInline);
    262         Value * size = &*aligned_malloc->arg_begin();
    263         SetInsertPoint(BasicBlock::Create(getContext(), "entry", aligned_malloc));
    264         const auto byteWidth = (intTy->getBitWidth() / 8);
    265         Constant * const offset = ConstantInt::get(intTy, alignment + byteWidth - 1);
    266         size = CreateAdd(size, offset);
    267         Value * unaligned = CreatePtrToInt(CreateMalloc(size), intTy);
    268         Value * aligned = CreateAnd(CreateAdd(unaligned, offset), ConstantExpr::getNot(ConstantInt::get(intTy, alignment - 1)));
    269         Value * prefix = CreateIntToPtr(CreateSub(aligned, ConstantInt::get(intTy, byteWidth)), intTy->getPointerTo());
    270         assert (unaligned->getType() == prefix->getType()->getPointerElementType());
    271         CreateAlignedStore(unaligned, prefix, byteWidth);
    272         CreateRet(CreateIntToPtr(aligned, voidPtrTy));
    273         restoreIP(ip);
    274     }
    275     return CreateCall(aligned_malloc, {CreateZExtOrTrunc(size, intTy)});
     253
     254        // aligned_malloc = LinkFunction("aligned_alloc", &aligned_alloc);
     255    }
     256
     257    size = CreateZExtOrTrunc(size, sizeTy);
     258    ConstantInt * const align = ConstantInt::get(sizeTy, alignment);
     259    if (codegen::EnableAsserts) {
     260        CreateAssert(CreateICmpEQ(CreateURem(size, align), ConstantInt::get(sizeTy, 0)),
     261                     "CreateAlignedMalloc: size must be an integral multiple of alignment.");
     262    }
     263    Value * const ptr = CreateCall(aligned_malloc, {align, size});
     264    CreateAssert(ptr, "CreateAlignedMalloc: returned null pointer.");
     265    return ptr;
    276266}
    277267
     
    440430}
    441431
    442 void CBuilder::CreateFree(Value * const ptr) {
     432void CBuilder::CreateFree(Value * ptr) {
    443433    assert (ptr->getType()->isPointerTy());
    444434    Module * const m = getModule();
     435    Type * const voidPtrTy =  getVoidPtrTy();
     436    Function * f = m->getFunction("free");
     437    if (f == nullptr) {
     438        FunctionType * fty = FunctionType::get(getVoidTy(), {voidPtrTy}, false);
     439        f = Function::Create(fty, Function::ExternalLinkage, "free", m);
     440        f->setCallingConv(CallingConv::C);
     441        // f = LinkFunction("free", &std::free);
     442    }
     443    ptr = CreatePointerCast(ptr, voidPtrTy);
     444    CreateCall(f, ptr)->setTailCall();
     445}
     446
     447Value * CBuilder::CreateRealloc(Value * const ptr, Value * size) {
     448    Module * const m = getModule();
     449    IntegerType * const sizeTy = getSizeTy();
    445450    PointerType * const voidPtrTy = getVoidPtrTy();
    446     Function * free = m->getFunction("free");
    447     if (free == nullptr) {
    448         FunctionType * fty = FunctionType::get(getVoidTy(), {voidPtrTy}, false);
    449         Module * const m = getModule();
    450         free = Function::Create(fty, Function::ExternalLinkage, "free", m);
    451         free->setCallingConv(CallingConv::C);
    452     }
    453     CallInst * const ci = CreateCall(free, CreatePointerCast(ptr, voidPtrTy));
     451    Function * f = m->getFunction("realloc");
     452    if (f == nullptr) {
     453        FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, sizeTy}, false);
     454        f = Function::Create(fty, Function::ExternalLinkage, "realloc", m);
     455        f->setCallingConv(CallingConv::C);
     456        f->setDoesNotAlias(0);
     457        f->setDoesNotAlias(1);
     458        // f = LinkFunction("realloc", &realloc);
     459    }
     460    Value * const addr = CreatePointerCast(ptr, voidPtrTy);
     461    size = CreateZExtOrTrunc(size, sizeTy);
     462    CallInst * const ci = CreateCall(f, {addr, size});
    454463    ci->setTailCall();
    455     ci->setCallingConv(free->getCallingConv());
    456 }
    457 
    458 void CBuilder::CreateAlignedFree(Value * const ptr, const bool testForNullAddress) {
    459     // WARNING: this will segfault if the value of the ptr at runtime is null but testForNullAddress was not set
    460     PointerType * type = cast<PointerType>(ptr->getType());
    461     BasicBlock * exit = nullptr;
    462     if (testForNullAddress) {
    463         LLVMContext & C = getContext();
    464         BasicBlock * bb = GetInsertBlock();
    465         Function * f = bb->getParent();
    466         exit = BasicBlock::Create(C, "", f, bb);
    467         BasicBlock * entry = BasicBlock::Create(C, "", f, exit);
    468         Value * cond = CreateICmpEQ(ptr, ConstantPointerNull::get(type));
    469         CreateCondBr(cond, exit, entry);
    470         SetInsertPoint(entry);
    471     }
    472     DataLayout DL(getModule());
    473     IntegerType * const intTy = getIntPtrTy(DL);
    474     const auto byteWidth = (intTy->getBitWidth() / 8);
    475     Value * prefix = CreatePtrToInt(ptr, intTy);
    476     prefix = CreateSub(prefix, ConstantInt::get(intTy, byteWidth));
    477     prefix = CreateIntToPtr(prefix, intTy->getPointerTo());
    478     prefix = CreateIntToPtr(CreateAlignedLoad(prefix, byteWidth), type);
    479     CreateFree(prefix);
    480     if (testForNullAddress) {
    481         CreateBr(exit);
    482         SetInsertPoint(exit);
    483     }
    484 }
    485 
    486 Value * CBuilder::CreateRealloc(Value * ptr, Value * size) {
    487     Module * const m = getModule();
    488     DataLayout DL(m);
    489     IntegerType * const intTy = getIntPtrTy(DL);
    490     PointerType * type = cast<PointerType>(ptr->getType());
    491     if (size->getType() != intTy) {
    492         if (isa<Constant>(size)) {
    493             size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    494         } else {
    495             size = CreateZExtOrTrunc(size, intTy);
    496         }
    497     }
    498     Function * realloc = m->getFunction("realloc");
    499     if (realloc == nullptr) {
    500         PointerType * const voidPtrTy = getVoidPtrTy();
    501         FunctionType * fty = FunctionType::get(voidPtrTy, {voidPtrTy, intTy}, false);       
    502         realloc = Function::Create(fty, Function::ExternalLinkage, "realloc", m);
    503         realloc->setCallingConv(CallingConv::C);
    504         realloc->setDoesNotAlias(1);
    505     }
    506     assert (size->getType() == intTy);
    507     CallInst * ci = CreateCall(realloc, {ptr, size});
    508     ci->setTailCall();
    509     ci->setCallingConv(realloc->getCallingConv());
    510     return CreateBitOrPointerCast(ci, type);
     464    return CreatePointerCast(ci, ptr->getType());
    511465}
    512466
    513467PointerType * CBuilder::getVoidPtrTy() const {
    514     return TypeBuilder<void *, true>::get(getContext());
     468    return TypeBuilder<void *, false>::get(getContext());
    515469}
    516470
     
    776730Function * CBuilder::LinkFunction(llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    777731    assert (mDriver);
    778     return mDriver->LinkFunction(getModule(), name, type, functionPtr);
     732    return mDriver->addLinkFunction(getModule(), name, type, functionPtr);
    779733}
    780734
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5440 r5464  
    2222namespace llvm { class Value; }
    2323
    24 class ParabixDriver;
     24class Driver;
    2525
    2626class CBuilder : public llvm::IRBuilder<> {
    27     friend class ParabixDriver;
    2827public:
    2928
     
    5453   
    5554    void CreateFree(llvm::Value * const ptr);
    56    
    57     void CreateAlignedFree(llvm::Value * const ptr, const bool testForNullAddress = false);
    58    
     55
    5956    llvm::Value * CreateRealloc(llvm::Value * ptr, llvm::Value * size);
    6057
     
    6764        instr->setAlignment(getCacheAlignment());
    6865        return instr;
     66    }
     67
     68    llvm::Value * CreateCacheAlignedMalloc(llvm::Value * size) {
     69        return CreateAlignedMalloc(size, getCacheAlignment());
    6970    }
    7071
     
    188189
    189190    virtual bool supportsIndirectBr() const {
    190         return true;
     191        return false;
    191192    }
    192193
     
    215216    llvm::Function * LinkFunction(llvm::StringRef name, ExternalFunctionType * functionPtr) const;
    216217
     218    void setDriver(Driver * const driver) {
     219        mDriver = driver;
     220    }
     221
    217222protected:
    218223
    219224    llvm::Function * LinkFunction(llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    220 
    221     void setDriver(ParabixDriver * driver) {
    222         mDriver = driver;
    223     }
    224225
    225226protected:
     
    229230    llvm::IntegerType *             mSizeType;
    230231    llvm::StructType *              mFILEtype;
    231     ParabixDriver *                 mDriver;
     232    Driver *                        mDriver;
    232233    llvm::LLVMContext               mContext;
    233234    const std::string               mTriple;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5440 r5464  
    66
    77#include "idisa_avx_builder.h"
     8
     9using namespace llvm;
    810
    911namespace IDISA {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r5436 r5464  
    88
    99#include <IR_Gen/idisa_sse_builder.h>
    10 
    11 using namespace llvm;
    1210
    1311namespace IDISA {
     
    2523    virtual std::string getBuilderUniqueName() override;
    2624
    27     Value * hsimd_signmask(unsigned fw, Value * a) override;
     25    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    2826
    2927    ~IDISA_AVX_Builder() {}
     
    4139
    4240    virtual std::string getBuilderUniqueName() override;
    43     Value * hsimd_packh(unsigned fw, Value * a, Value * b) override;
    44     Value * hsimd_packl(unsigned fw, Value * a, Value * b) override;
    45     Value * esimd_mergeh(unsigned fw, Value * a, Value * b) override;
    46     Value * esimd_mergel(unsigned fw, Value * a, Value * b) override;
    47     Value * hsimd_packh_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) override;
    48     Value * hsimd_packl_in_lanes(unsigned lanes, unsigned fw, Value * a, Value * b) override;
    49     std::pair<Value *, Value *> bitblock_add_with_carry(Value * a, Value * b, Value * carryin) override;
     41    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     42    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     43    llvm::Value * esimd_mergeh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     44    llvm::Value * esimd_mergel(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     45    llvm::Value * hsimd_packh_in_lanes(unsigned lanes, unsigned fw, llvm::Value * a, llvm::Value * b) override;
     46    llvm::Value * hsimd_packl_in_lanes(unsigned lanes, unsigned fw, llvm::Value * a, llvm::Value * b) override;
     47    std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
    5048
    5149    ~IDISA_AVX2_Builder() {}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5458 r5464  
    108108    virtual llvm::Value * bitblock_set_bit(llvm::Value * pos);
    109109
    110     virtual void CreateBaseFunctions(){};
     110    virtual void CreateBaseFunctions() {}
    111111   
    112112    llvm::Value * simd_and(llvm::Value * a, llvm::Value * b);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_i64_builder.cpp

    r5374 r5464  
    66
    77#include "idisa_i64_builder.h"
     8
     9using namespace llvm;
    810
    911namespace IDISA {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_i64_builder.h

    r5436 r5464  
    88 */
    99#include <IR_Gen/idisa_builder.h>
    10 
    11 using namespace llvm;
    1210
    1311namespace IDISA {
     
    2321    virtual std::string getBuilderUniqueName() override;
    2422
    25     Value * hsimd_packh(unsigned fw, Value * a, Value * b) override;
    26     Value * hsimd_packl(unsigned fw, Value * a, Value * b) override;
     23    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     24    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    2725    ~IDISA_I64_Builder() {}
    2826
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5440 r5464  
    88#include <llvm/IR/InlineAsm.h>
    99#include <llvm/IR/Module.h>
     10
     11using namespace llvm;
    1012
    1113namespace IDISA {
     
    276278}
    277279
    278    
    279 }
     280void IDISA_NVPTX20_Builder::CreateBaseFunctions() {
     281    CreateGlobals();
     282    CreateBuiltinFunctions();
     283    CreateLongAdvanceFunc();
     284    CreateLongAddFunc();
     285    CreateBallotFunc();
     286}
     287
     288   
     289}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.h

    r5458 r5464  
    88
    99#include <IR_Gen/idisa_i64_builder.h>
    10 
    11 using namespace llvm;
    1210
    1311namespace IDISA {
     
    2422
    2523    ~IDISA_NVPTX20_Builder() {}
     24
    2625    virtual std::string getBuilderUniqueName() override;
     26
    2727    int getGroupThreads();
    2828
    29     void CreateBaseFunctions() override {
    30         CreateGlobals();
    31         CreateBuiltinFunctions();
    32         CreateLongAdvanceFunc();
    33         CreateLongAddFunc();
    34         CreateBallotFunc();
    35     };
     29    void CreateBaseFunctions() override;
    3630   
    37     Value * bitblock_any(Value * a) override;
    38     std::pair<Value *, Value *> bitblock_add_with_carry(Value * a, Value * b, Value * carryin) override;
    39     virtual std::pair<Value *, Value *> bitblock_advance(Value * a, Value * shiftin, unsigned shift) override;
    40     Value * bitblock_mask_from(Value * pos) override;
    41     Value * bitblock_set_bit(Value * pos) override;
     31    llvm::Value * bitblock_any(llvm::Value * a) override;
     32    std::pair<llvm::Value *, llvm::Value *> bitblock_add_with_carry(llvm::Value * a, llvm::Value * b, llvm::Value * carryin) override;
     33    virtual std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
     34    llvm::Value * bitblock_mask_from(llvm::Value * pos) override;
     35    llvm::Value * bitblock_set_bit(llvm::Value * pos) override;
    4236
    43     Value * getEOFMask(Value * remainingBytes);
     37    llvm::Value * getEOFMask(llvm::Value * remainingBytes);
    4438
    45     Value * Advance(const unsigned index, const unsigned shiftAmount, Value * const value);
    46     Value * LongAdd(Value * const valA, Value * const valB, Value * carryIn);
     39    llvm::Value * Advance(const unsigned index, const unsigned shiftAmount, llvm::Value * const value);
     40    llvm::Value * LongAdd(llvm::Value * const valA, llvm::Value * const valB, llvm::Value * carryIn);
    4741
    48     LoadInst * CreateAtomicLoadAcquire(Value * ptr) override;
    49     StoreInst * CreateAtomicStoreRelease(Value * val, Value * ptr) override;
     42    llvm::LoadInst * CreateAtomicLoadAcquire(llvm::Value * ptr) override;
     43    llvm::StoreInst * CreateAtomicStoreRelease(llvm::Value * val, llvm::Value * ptr) override;
    5044
    5145    bool supportsIndirectBr() const final {
     
    5448
    5549private:
     50
    5651    void CreateGlobals();
    5752    void CreateBuiltinFunctions();
     
    6055    void CreateBallotFunc();
    6156
    62     int                                 groupThreads;
    63     Function *                          barrierFunc;
    64     Function *                          tidFunc;
    65     Function *                          mLongAdvanceFunc;
    66     Function *                          mLongAddFunc;
    67     GlobalVariable*                     carry;
    68     GlobalVariable*                     bubble;
     57private:
     58    int                         groupThreads;
     59    llvm::Function *            barrierFunc;
     60    llvm::Function *            tidFunc;
     61    llvm::Function *            mLongAdvanceFunc;
     62    llvm::Function *            mLongAddFunc;
     63    llvm::GlobalVariable*       carry;
     64    llvm::GlobalVariable*       bubble;
    6965};
    7066
     
    7470    IDISA_NVPTX35_Builder(Module * m, int groupSize) : IDISA_NVPTX30_Builder(m, groupSize) {}
    7571   
    76     std::pair<Value *, Value *> bitblock_advance(Value * a, Value * shiftin, unsigned shift) override;
     72    std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
    7773
    7874    ~IDISA_NVPTX35_Builder() {};
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5440 r5464  
    66
    77#include "idisa_sse_builder.h"
     8
     9using namespace llvm;
    810
    911namespace IDISA {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.h

    r5436 r5464  
    99
    1010#include <IR_Gen/idisa_builder.h>
    11 
    12 using namespace llvm;
    1311
    1412namespace IDISA {
     
    2321
    2422    virtual std::string getBuilderUniqueName() override;
    25     Value * hsimd_signmask(unsigned fw, Value * a) override;
     23    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    2624    ~IDISA_SSE_Builder() {}
    2725
     
    3836
    3937    virtual std::string getBuilderUniqueName() override;
    40     Value * hsimd_signmask(unsigned fw, Value * a) override;
    41     Value * hsimd_packh(unsigned fw, Value * a, Value * b) override;
    42     Value * hsimd_packl(unsigned fw, Value * a, Value * b) override;
    43     std::pair<Value *, Value *> bitblock_advance(Value * a, Value * shiftin, unsigned shift) final;
     38    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
     39    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     40    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     41    std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) final;
    4442
    4543    ~IDISA_SSE2_Builder() {}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5458 r5464  
    2121KernelBuilder * GetIDISA_Builder(llvm::LLVMContext & C, const std::string & targetTriple) {
    2222    unsigned registerWidth = 0;
    23     Triple T(targetTriple);
     23    llvm::Triple T(targetTriple);
    2424    if (T.isArch64Bit()) {
    2525        registerWidth = 64;
  • icGREP/icgrep-devel/icgrep/IR_Gen/tracegen.h

    r5398 r5464  
    88#include "idisa_builder.h"
    99#include <string>
    10 #include <llvm/IR/Module.h>
    11 #include <llvm/IR/Constants.h>
    12 #include <llvm/IR/Intrinsics.h>
    13 #include <llvm/IR/Function.h>
    14 
    15 
    1610
    1711class TraceTool {
     
    3428};
    3529
    36 using namespace llvm;
    37 
    38 TraceTool::TraceTool(IDISA::IDISA_Builder * b, unsigned log2TraceBufSize) :
    39     iBuilder(b),
    40     mLog2TraceBufSize(log2TraceBufSize),
    41     mTraceVarCount(0) {
    42 
    43     Type * entryType = StructType::get(iBuilder->getInt8Ty()->getPointerTo(), iBuilder->getSizeTy(), nullptr);
    44     Type * bufferType = ArrayType::get(entryType, 1 << mLog2TraceBufSize);
    45     mTraceBufferPtr = iBuilder->CreateAlloca(bufferType);
    46     mTraceIndexPtr = iBuilder->CreateAlloca(iBuilder->getInt32Ty());
    47     iBuilder->CreateStore(ConstantInt::getNullValue(iBuilder->getInt32Ty()), mTraceIndexPtr);
    48     mTraceIndexMask = ConstantInt::get(iBuilder->getInt32Ty(), (1 << mLog2TraceBufSize) - 1);
    49 }
    50 
    51 unsigned TraceTool::newTraceVar(std::string traceName) {
    52     std::string formatString = traceName + " = %" PRIx64 "\n";
    53     mTraceFormatString.push_back(iBuilder->GetString(formatString.c_str()));
    54     return mTraceVarCount++;
    55 }
    56 
    57 void TraceTool::addTraceEntry(unsigned traceVar, llvm::Value * traceVal) {
    58    
    59     Value * traceIndex = iBuilder->CreateLoad(mTraceIndexPtr);
    60     Value * entryVarPtr = iBuilder->CreateGEP(mTraceBufferPtr, {iBuilder->getInt32(0), traceIndex, iBuilder->getInt32(0)});
    61     iBuilder->CreateStore(mTraceFormatString[traceVar], entryVarPtr);
    62     Value * entryValPtr = iBuilder->CreateGEP(mTraceBufferPtr, {iBuilder->getInt32(0), traceIndex, iBuilder->getInt32(1)});
    63     iBuilder->CreateStore(iBuilder->CreateZExt(traceVal, iBuilder->getSizeTy()), entryValPtr);
    64     iBuilder->CreateStore(iBuilder->CreateAnd(mTraceIndexMask, iBuilder->CreateAdd(traceIndex, iBuilder->getInt32(1))), mTraceIndexPtr);
    65 }
    66 
    67 void TraceTool::createDumpTrace() {
    68     Constant * traceBufSize = ConstantInt::get(iBuilder->getInt32Ty(), 1<<mLog2TraceBufSize);
    69     Function * printF = iBuilder->GetPrintf();
    70     BasicBlock * DumpEntryBlock = iBuilder->GetInsertBlock();
    71     Function * currentFn = DumpEntryBlock->getParent();
    72     BasicBlock * DumpTraceLoop = BasicBlock::Create(iBuilder->getContext(), "DumpTraceLoop", currentFn, 0);
    73     BasicBlock * DumpTraceExit = BasicBlock::Create(iBuilder->getContext(), "DumpTraceExit", currentFn, 0);
    74    
    75     Value * lastTraceIndex = iBuilder->CreateLoad(mTraceIndexPtr);
    76     Value * truncated = iBuilder->CreateICmpUGT(lastTraceIndex, traceBufSize);
    77     Value * firstDumpIndex = iBuilder->CreateSelect(truncated, iBuilder->CreateSub(lastTraceIndex, traceBufSize), ConstantInt::getNullValue(iBuilder->getInt32Ty()));
    78    
    79     iBuilder->CreateBr(DumpTraceLoop);
    80     iBuilder->SetInsertPoint(DumpTraceLoop);
    81     PHINode * loopIndex = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
    82     loopIndex->addIncoming(firstDumpIndex, DumpEntryBlock);
    83    
    84     Value * entryVarPtr = iBuilder->CreateGEP(mTraceBufferPtr, {iBuilder->getInt32(0), loopIndex, iBuilder->getInt32(0)});
    85     Value * formatString = iBuilder->CreateLoad(entryVarPtr);
    86     Value * entryValPtr = iBuilder->CreateGEP(mTraceBufferPtr, {iBuilder->getInt32(0), loopIndex, iBuilder->getInt32(1)});
    87     Value * entryVal = iBuilder->CreateLoad(entryValPtr);
    88     iBuilder->CreateCall(printF, {formatString, entryVal});
    89    
    90     Value * nextIndex = iBuilder->CreateAnd(iBuilder->CreateAdd(loopIndex, iBuilder->getInt32(1)), mTraceIndexMask);
    91     loopIndex->addIncoming(nextIndex, DumpTraceLoop);
    92     Value * atLastTraceIndex = iBuilder->CreateICmpEQ(nextIndex, lastTraceIndex);
    93     iBuilder->CreateCondBr(atLastTraceIndex, DumpTraceExit, DumpTraceLoop);
    94     iBuilder->SetInsertPoint(DumpTraceExit);
    95 }
    96 
    9730#endif
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5457 r5464  
    2424#include <boost/filesystem.hpp>
    2525#include <boost/iostreams/device/mapped_file.hpp>
    26 #include "llvm/ADT/StringRef.h"                    // for StringRef
    27 #include "llvm/IR/CallingConv.h"                   // for ::C
    28 #include "llvm/IR/DerivedTypes.h"                  // for ArrayType
    29 #include "llvm/IR/LLVMContext.h"                   // for LLVMContext
    30 #include "llvm/IR/Value.h"                         // for Value
    31 #include "llvm/Support/Debug.h"                    // for dbgs
     26#include <llvm/ADT/StringRef.h>
     27#include <llvm/IR/CallingConv.h>
     28#include <llvm/IR/DerivedTypes.h>
     29#include <llvm/IR/LLVMContext.h>
     30#include <llvm/IR/Value.h>
     31#include <llvm/Support/Debug.h>
    3232#include <pablo/pablo_toolchain.h>
    3333#include <iostream>
    34 
     34#include <toolchain/cpudriver.h>
    3535#include <pablo/passes/ssapass.h>
    3636
     
    5050    ParenthesisMatchingKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned count);
    5151    bool isCachable() const override { return true; }
    52     bool moduleIDisSignature() const override { return true; }
     52    bool hasSignature() const override { return false; }
    5353protected:
    5454    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5436 r5464  
    1212#include <llvm/Support/CommandLine.h>
    1313#include <toolchain/toolchain.h>
     14#include <toolchain/cpudriver.h>
    1415#include <IR_Gen/idisa_target.h>
    1516#include <kernels/source_kernel.h>
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5457 r5464  
    2323#include <editd/editdscan_kernel.h>
    2424#include <editd/pattern_compiler.h>
     25#include <toolchain/cpudriver.h>
    2526#include <sys/stat.h>
    2627#include <fcntl.h>
     
    243244    PreprocessKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
    244245    bool isCachable() const override { return true; }
    245     bool moduleIDisSignature() const override { return true; }
     246    bool hasSignature() const override { return false; }
    246247protected:
    247248    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5462 r5464  
    77#include "grep_engine.h"
    88#include <llvm/IR/Module.h>
    9 //#include <llvm/ExecutionEngine/MCJIT.h>
    10 #include <llvm/IR/Verifier.h>
    119#include <llvm/Support/CommandLine.h>
    1210#include <boost/filesystem.hpp>
     
    2826#include <re/re_toolchain.h>
    2927#include <toolchain/toolchain.h>
     28#include <toolchain/cpudriver.h>
     29#include <toolchain/NVPTXDriver.h>
    3030#include <iostream>
    3131#include <sstream>
     
    3535#include <sys/stat.h>
    3636#include <fcntl.h>
     37
    3738#ifdef CUDA_ENABLED
    3839#include <preprocess.cpp>
     
    252253}
    253254
    254 void GrepEngine::grepCodeGen_nvptx(const std::string & moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16) {
    255 
    256     NVPTXDriver pxDriver(moduleName + ":icgrep");
     255void GrepEngine::grepCodeGen_nvptx(std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16) {
     256
     257    NVPTXDriver pxDriver("engine");
    257258    auto & idb = pxDriver.getBuilder();
    258259    Module * M = idb->getModule();
     
    341342}
    342343
    343 void GrepEngine::grepCodeGen(const std::string & moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
    344 
    345     ParabixDriver pxDriver(moduleName + ":icgrep");
     344void GrepEngine::grepCodeGen(std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
     345
     346    ParabixDriver pxDriver("engine");
    346347    auto & idb = pxDriver.getBuilder();
    347348    Module * M = idb->getModule();
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5458 r5464  
    99#include <string>       // for string
    1010#include <vector>
    11 namespace llvm { class ExecutionEngine; }
    12 namespace llvm { class Module; }
     11
    1312namespace re { class CC; }
    1413namespace re { class RE; }
     
    1918    GrepEngine();
    2019
    21     void grepCodeGen(const std::string & moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepSource grepSource, GrepType grepType = GrepType::Normal);
     20    void grepCodeGen(std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepSource grepSource, GrepType grepType = GrepType::Normal);
    2221
    23     void grepCodeGen_nvptx(const std::string & moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16);
     22    void grepCodeGen_nvptx(std::vector<re::RE *> REs, bool CountOnly, bool UTF_16);
    2423
    2524    void doGrep(const std::string & fileName) const;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5454 r5464  
    3131IR_Gen/idisa_target.cpp
    3232IR_Gen/idisa_target.h
    33 IR_Gen/llvm2ptx.h
    3433IR_Gen/tracegen.h
    3534kernels/alignedprint.cpp
     
    7776kernels/swizzle.cpp
    7877kernels/swizzle.h
     78kernels/until_n.cpp
     79kernels/until_n.h
    7980pablo/analysis/pabloverifier.cpp
    8081pablo/analysis/pabloverifier.hpp
     
    186187re/re_utility.cpp
    187188re/re_utility.h
     189toolchain/cpudriver.cpp
     190toolchain/cpudriver.h
     191toolchain/driver.cpp
     192toolchain/driver.h
     193toolchain/NVPTXDriver.cpp
     194toolchain/NVPTXDriver.h
    188195toolchain/object_cache.cpp
    189196toolchain/object_cache.h
     
    192199toolchain/toolchain.cpp
    193200toolchain/toolchain.h
     201toolchain/workqueue.h
    194202UCD/Blocks.h
    195203UCD/CaseFolding_txt.cpp
     
    241249lz4FrameDecoder.cpp
    242250lz4FrameDecoder.h
     251preprocess.cpp
    243252u8u16.cpp
    244253utf16_encoder.cpp
     
    248257wc.cpp
    249258CMakeLists.txt
    250 toolchain/workqueue.h
     259IR_Gen/tracegen.cpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r5425 r5464  
    11.
    2 re
    3 cc
    4 pablo
    5 pablo/analysis
    6 pablo/optimizers
    7 UCD
    8 /usr/include/
    92../boost/include/
    10 ../cudd-2.5.1/cudd
    11 ../buddy-2.4/src
    12 pablo/passes
    13 ../llvm-build/lib
    14 kernels
    15 IDISA
    16 pablo/symbol-table
    17 include/simd-lib/idisa_cpp
    18 CMakeFiles/3.2.2/CompilerIdC
    19 include/simd-lib
    20 CMakeFiles
    21 CMakeFiles/3.2.2/CompilerIdCXX
    22 util
    23 pablo/type
    24 editd
    25 kernels/type
    26 IDISA/types
     3../libllvm/include/
    274IR_Gen
    28 IR_Gen/types
    29 pipeline
    30 toolchain
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5458 r5464  
    2020#include <fstream>
    2121#include <string>
    22 #include <boost/uuid/sha1.hpp>
    2322#include <toolchain/toolchain.h>
    2423#include <re/re_toolchain.h>
     
    113112}
    114113
    115 static std::string allREs;
    116114static re::ModeFlagSet globalFlags = 0;
    117115
     
    148146#endif
    149147        REs.push_back(re_ast);
    150         allREs += regexVector[i] + "\n";
    151148    }
    152149
     
    181178
    182179    return REs;
    183 }
    184 
    185 std::string sha1sum(const std::string & str) {
    186     char buffer[41];    // 40 hex-digits and the terminating null
    187     unsigned int digest[5];     // 160 bits in total
    188 
    189     boost::uuids::detail::sha1 sha1;
    190     sha1.process_bytes(str.c_str(), str.size());
    191     sha1.get_digest(digest);
    192     snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
    193              digest[0], digest[1], digest[2], digest[3], digest[4]);
    194     return std::string(buffer);
    195180}
    196181
     
    396381    const auto REs = readExpressions();
    397382
    398     std::string module_name = "grepcode:" + sha1sum(allREs) + ":" + std::to_string(globalFlags);
    399    
    400383    if (GrepSupport) {  // Calls icgrep again on command line and passes output to grep.
    401384        pipeIcGrepOutputToGrep(argc, argv);
     
    410393    if (allFiles.empty()) {
    411394
    412         grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::StdIn);
     395        grepEngine.grepCodeGen(REs, CountOnly, UTF_16, GrepSource::StdIn);
    413396        allFiles = { "-" };
    414397        initFileResult(allFiles);
     
    421404       
    422405        if(codegen::NVPTX){
    423             grepEngine.grepCodeGen_nvptx(module_name, REs, CountOnly, UTF_16);
     406            grepEngine.grepCodeGen_nvptx(REs, CountOnly, UTF_16);
    424407            for (unsigned i = 0; i != allFiles.size(); ++i) {
    425408                grepEngine.doGrep(allFiles[i]);
     
    428411        }
    429412        else{
    430             grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::File);
     413            grepEngine.grepCodeGen(REs, CountOnly, UTF_16, GrepSource::File);
    431414        }
    432415
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5440 r5464  
    2727    DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount);
    2828    bool isCachable() const override { return true; }
    29     bool moduleIDisSignature() const override { return true; }
     29    bool hasSignature() const override { return false; }
    3030protected:
    3131    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     
    4040    DeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned fw, unsigned streamCount, bool shouldSwizzle);
    4141    bool isCachable() const override { return true; }
    42     bool moduleIDisSignature() const override { return true; }
     42    bool hasSignature() const override { return false; }
    4343protected:
    4444    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     
    5959    SwizzledBitstreamCompressByCount(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned fieldWidth = 64);
    6060    bool isCachable() const override { return true; }
    61     bool moduleIDisSignature() const override { return true; }
     61    bool hasSignature() const override { return false; }
    6262protected:
    6363    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5454 r5464  
    1212#include <pablo/builder.hpp>
    1313#include <pablo/pe_count.h>
     14
     15#include <llvm/Support/raw_ostream.h>
    1416
    1517using namespace kernel;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5460 r5464  
    193193std::string Kernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> & idb) {
    194194    assert ("KernelBuilder does not have a valid IDISA Builder" && idb.get());
    195     if (LLVM_LIKELY(moduleIDisSignature())) {
    196         return getModule()->getModuleIdentifier();
    197     } else {
     195    if (LLVM_UNLIKELY(hasSignature())) {
    198196        generateKernel(idb);
    199197        std::string signature;
     
    201199        WriteBitcodeToFile(getModule(), OS);
    202200        return signature;
     201    } else {
     202        return getModule()->getModuleIdentifier();
    203203    }
    204204}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5456 r5464  
    99#include "interface.h"
    1010#include <boost/container/flat_map.hpp>
    11 #include <IR_Gen/idisa_builder.h>
    12 #include <toolchain/pipeline.h>
    1311#include <llvm/IR/Constants.h>
    1412
    1513namespace llvm { class Function; }
    1614namespace llvm { class IntegerType; }
     15namespace llvm { class IndirectBrInst; }
     16namespace llvm { class PHINode; }
    1717namespace llvm { class LoadInst; }
    1818namespace llvm { class Type; }
     
    6363    // based on the semantics of the kernel. 
    6464    //
    65     // If no other mechanism is available, the default generateKernelSignature() method
    66     // uses the full LLVM IR (before optimization) of the kernel instance.
     65    // If no other mechanism is available, the default makeSignature() method uses the
     66    // full LLVM IR (before optimization) of the kernel instance.
    6767    //
    6868    // A kernel Module ID is short string that is used as a name for a particular kernel
    69     // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
    70     // and so should be highly likely to uniquely identify a kernel instance.
     69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
     70    // instances and so should be highly likely to uniquely identify a kernel instance.
    7171    //
    7272    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
    73     // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
    74     // should return true.
     73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
    7574    //
    7675       
     
    8079
    8180    // Can the module ID itself serve as the unique signature?
    82     virtual bool moduleIDisSignature() const { return false; }
     81    virtual bool hasSignature() const { return true; }
    8382
    8483    // Create a module stub for the kernel, populated only with its Module ID.     
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.h

    r5436 r5464  
    1616    LineBreakKernelBuilder(const std::unique_ptr<KernelBuilder> & b, unsigned basisBitsCount);
    1717    bool isCachable() const override { return true; }
    18     bool moduleIDisSignature() const override { return true; }
     18    bool hasSignature() const override { return false; }
    1919protected:
    2020    void generatePabloMethod() override;
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5440 r5464  
    1616    P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    1717    bool isCachable() const override { return true; }
    18     bool moduleIDisSignature() const override { return true; }
     18    bool hasSignature() const override { return false; }
    1919private:
    2020    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    2525    P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    2626    bool isCachable() const override { return true; }
    27     bool moduleIDisSignature() const override { return true; }
     27    bool hasSignature() const override { return false; }
    2828private:
    2929    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    3434    P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    3535    bool isCachable() const override { return true; }
    36     bool moduleIDisSignature() const override { return true; }
     36    bool hasSignature() const override { return false; }
    3737private:
    3838    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    4343    P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4444    bool isCachable() const override { return true; }
    45     bool moduleIDisSignature() const override { return true; }
     45    bool hasSignature() const override { return false; }
    4646private:
    4747    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5440 r5464  
    2323    expand3_4Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    2424    bool isCachable() const override { return true; }
    25     bool moduleIDisSignature() const override { return true; }
     25    bool hasSignature() const override { return false; }
    2626private:
    2727    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) override;
     
    3232    radix64Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    3333    bool isCachable() const override { return true; }
    34     bool moduleIDisSignature() const override { return true; }
     34    bool hasSignature() const override { return false; }
    3535private:
    3636    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
     
    4343    base64Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4444    bool isCachable() const override { return true; }
    45     bool moduleIDisSignature() const override { return true; }
     45    bool hasSignature() const override { return false; }
    4646private:
    4747    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5440 r5464  
    1616    S2PKernel(const std::unique_ptr<kernel::KernelBuilder> & b, bool aligned = true);
    1717    bool isCachable() const override { return true; }
    18     bool moduleIDisSignature() const override { return true; }
     18    bool hasSignature() const override { return false; }
    1919protected:
    2020    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5440 r5464  
    1818    ScanMatchKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const GrepType grepType, const unsigned codeUnitWidth);
    1919    bool isCachable() const override { return true; }
    20     bool moduleIDisSignature() const override { return true; }
     20    bool hasSignature() const override { return false; }
    2121protected:
    2222    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5454 r5464  
    160160void ReadSourceKernel::generateInitializeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    161161    ConstantInt * const bufferSize = iBuilder->getSize(64 * getpagesize());
    162     Value * const buffer = iBuilder->CreateAlignedMalloc(bufferSize, iBuilder->getCacheAlignment());
     162    Value * const buffer = iBuilder->CreateCacheAlignedMalloc(bufferSize);
    163163    iBuilder->setScalarField("buffer", buffer);
    164164    iBuilder->setScalarField("capacity", bufferSize);
     
    179179    assert(iBuilder->getKernel() == this);
    180180
    181     // The ReadSourceKernel begins by checking whether it needs to read another page of data
     181    // Check whether we need to read another page of data
    182182    ConstantInt * const segmentSize = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    183183    Value * bufferedSize = iBuilder->getBufferedSize("sourceBuffer");
     
    203203    inputStream = iBuilder->CreatePointerCast(inputStream, codeUnitPtrTy);
    204204    Value * const originalPtr = iBuilder->CreateGEP(inputStream, produced);
     205
    205206    Value * const buffer = iBuilder->CreatePointerCast(iBuilder->getScalarField("buffer"), codeUnitPtrTy);
    206207    Value * const capacity = iBuilder->getScalarField("capacity");
    207     Value * const canAppend = iBuilder->CreateICmpULT(iBuilder->CreateGEP(originalPtr, pageSize), iBuilder->CreateGEP(buffer, capacity));
     208
     209    Value * L = iBuilder->CreateGEP(originalPtr, pageSize);
     210
     211//    iBuilder->CallPrintInt("L", L);
     212
     213    Value * B = iBuilder->CreateGEP(buffer, capacity);
     214
     215//    iBuilder->CallPrintInt("B", B);
     216
     217    Value * const canAppend = iBuilder->CreateICmpULT(L, B);
    208218    iBuilder->CreateLikelyCondBr(canAppend, readData, waitOnConsumers);
    209219
     
    211221    iBuilder->SetInsertPoint(waitOnConsumers);
    212222    iBuilder->CreateConsumerWait();
     223
    213224    // Then determine how much data has been consumed and how much needs to be copied back, noting
    214225    // that our "unproduced" data must be block aligned.
    215     const auto alignment = iBuilder->getBitBlockWidth() / 8;
    216     Constant * const alignmentMask = ConstantExpr::getNeg(iBuilder->getSize(alignment));
     226    const auto blockAlignment = iBuilder->getBitBlockWidth() / 8;
     227    Constant * const alignmentMask = ConstantExpr::getNot(iBuilder->getSize(blockAlignment - 1));
    217228    Value * const consumed = iBuilder->CreateAnd(iBuilder->getConsumedItemCount("sourceBuffer"), alignmentMask);
    218229    Value * const remaining = iBuilder->CreateSub(bufferedSize, consumed);
    219230    Value * const unconsumedPtr = iBuilder->CreateGEP(inputStream, consumed);
    220231    Value * const consumedMajority = iBuilder->CreateICmpULT(iBuilder->CreateGEP(buffer, remaining), unconsumedPtr);
     232
     233//    iBuilder->CallPrintInt("consumedMajority", consumedMajority);
     234
    221235    BasicBlock * const copyBack = iBuilder->CreateBasicBlock("CopyBack");
    222236    BasicBlock * const expandAndCopyBack = iBuilder->CreateBasicBlock("ExpandAndCopyBack");
     
    227241    iBuilder->SetInsertPoint(copyBack);
    228242    // If so, just copy the data ...
    229     iBuilder->CreateMemCpy(buffer, unconsumedPtr, remaining, alignment);
     243    iBuilder->CreateMemCpy(buffer, unconsumedPtr, remaining, blockAlignment);
    230244    iBuilder->CreateBr(calculateLogicalAddress);
    231245    // Otherwise, allocate a buffer with twice the capacity and copy the unconsumed data back into it
    232246    iBuilder->SetInsertPoint(expandAndCopyBack);
    233247    Value * const expandedCapacity = iBuilder->CreateShl(capacity, 1);
    234     Value * const expandedBuffer = iBuilder->CreateAlignedMalloc(expandedCapacity, iBuilder->getCacheAlignment());
     248    Value * const expandedBuffer = iBuilder->CreateCacheAlignedMalloc(expandedCapacity);
    235249    Value * const expandedPtr = iBuilder->CreatePointerCast(expandedBuffer, codeUnitPtrTy);
    236     iBuilder->CreateMemCpy(expandedPtr, unconsumedPtr, remaining, alignment);
    237     iBuilder->CreateAlignedFree(buffer);
     250    iBuilder->CreateMemCpy(expandedPtr, unconsumedPtr, remaining, blockAlignment);
     251    iBuilder->CreateFree(buffer);
    238252    iBuilder->setScalarField("buffer", expandedBuffer);
    239253    iBuilder->setScalarField("capacity", expandedCapacity);
     
    285299
    286300void ReadSourceKernel::generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    287     iBuilder->CreateAlignedFree(iBuilder->getScalarField("buffer"));
     301    iBuilder->CreateFree(iBuilder->getScalarField("buffer"));
    288302}
    289303
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.h

    r5440 r5464  
    1515    MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    1616    bool isCachable() const override { return true; }
    17     bool moduleIDisSignature() const override { return true; }
     17    bool hasSignature() const override { return false; }
    1818protected:
    1919    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    3131    ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    3232    bool isCachable() const override { return true; }
    33     bool moduleIDisSignature() const override { return true; }
     33    bool hasSignature() const override { return false; }
    3434protected:
    3535    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
     
    4444public:
    4545    MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, llvm::Type * type, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
    46     bool moduleIDisSignature() const override { return true; }
     46    bool hasSignature() const override { return false; }
    4747protected:
    4848    void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5457 r5464  
    319319    Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), iBuilder->getSizeTy(), false);
    320320    Constant * const size = ConstantExpr::getMul(iBuilder->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
    321     Value * const ptr = iBuilder->CreateAlignedMalloc(size, iBuilder->getCacheAlignment());
     321    const auto alignment = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
     322    Value * const ptr = iBuilder->CreateAlignedMalloc(size, alignment);
    322323    iBuilder->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
    323324    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     
    385386
    386387        Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
    387         Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), iBuilder->getCacheAlignment()), elementType->getPointerTo());
     388        const auto memAlign = std::max(iBuilder->getCacheAlignment(), iBuilder->getBitBlockWidth() / 8);
     389        Value * newStreamSet = iBuilder->CreatePointerCast(iBuilder->CreateAlignedMalloc(iBuilder->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
    388390        Value * const diffCapacity = iBuilder->CreateMul(iBuilder->CreateSub(newCapacity, capacity), vectorWidth);
    389391
     
    401403        }
    402404
    403         iBuilder->CreateAlignedFree(streamSet);
     405        iBuilder->CreateFree(streamSet);
    404406
    405407        iBuilder->CreateRet(newStreamSet);
     
    454456
    455457void ExpandableBuffer::releaseBuffer(IDISA::IDISA_Builder * const iBuilder, Value * self) const {
    456     iBuilder->CreateAlignedFree(getBaseAddress(iBuilder, self));
     458    iBuilder->CreateFree(getBaseAddress(iBuilder, self));
    457459}
    458460
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r5446 r5464  
    2929
    3030#include <kernels/kernel_builder.h>
     31#include <toolchain/cpudriver.h>
    3132
    3233#include <string>
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5440 r5464  
    222222        iBuilder->CreateMemCpy(newArray, array, capacitySize, BlockWidth);
    223223        iBuilder->CreateMemZero(iBuilder->CreateGEP(newArray, capacitySize), capacitySize, BlockWidth);
    224         iBuilder->CreateAlignedFree(array);
     224        iBuilder->CreateFree(array);
    225225        newArray = iBuilder->CreatePointerCast(newArray, array->getType());
    226226        iBuilder->CreateStore(newArray, arrayPtr);
     
    235235        iBuilder->CreateMemCpy(newSummary, summary, summarySize, BlockWidth);
    236236        iBuilder->CreateMemZero(iBuilder->CreateGEP(newSummary, summarySize), iBuilder->getSize(2 * BlockWidth), BlockWidth);
    237         iBuilder->CreateAlignedFree(summary);
     237        iBuilder->CreateFree(summary);
    238238
    239239        Value * ptr1 = iBuilder->CreateGEP(newSummary, summarySize);
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.cpp

    r5267 r5464  
    11#include "distributivepass.h"
    22
     3#include <pablo/pablo_kernel.h>
    34#include <pablo/codegenstate.h>
    4 #ifndef NDEBUG
    5 #include <pablo/analysis/pabloverifier.hpp>
    6 #endif
    7 #include <pablo/optimizers/pablo_simplifier.hpp>
    8 #include <pablo/passes/flattenassociativedfg.h>
     5#include <pablo/branch.h>
     6#include <pablo/pe_string.h>
     7#include <pablo/pe_integer.h>
     8#include <pablo/pe_zeroes.h>
     9#include <pablo/pe_ones.h>
     10#include <pablo/boolean.h>
    911#include <boost/container/flat_set.hpp>
    1012#include <boost/container/flat_map.hpp>
    1113#include <boost/graph/adjacency_list.hpp>
    12 
    13 #include <pablo/printer_pablos.h>
    14 #include <iostream>
     14#include <boost/graph/topological_sort.hpp>
     15#include <boost/function_output_iterator.hpp>
     16
     17#include <boost/graph/strong_components.hpp>
     18#include <llvm/Support/raw_ostream.h>
    1519
    1620using namespace boost;
    1721using namespace boost::container;
    18 
    19 namespace pablo {
    20 
    21 using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, PabloAST *>;
     22using namespace llvm;
     23
     24using TypeId = pablo::PabloAST::ClassTypeId;
     25using VertexData = std::pair<pablo::PabloAST *, TypeId>;
     26
     27using Graph = adjacency_list<hash_setS, vecS, bidirectionalS, VertexData, pablo::PabloAST *>;
    2228using Vertex = Graph::vertex_descriptor;
    23 using Map = flat_map<PabloAST *, Vertex>;
     29
     30using DistributionGraph = adjacency_list<hash_setS, vecS, bidirectionalS, Vertex>;
     31using DistributionVertex = DistributionGraph::vertex_descriptor;
     32
    2433using VertexSet = std::vector<Vertex>;
    2534using Biclique = std::pair<VertexSet, VertexSet>;
     
    2736using DistributionSet = std::tuple<VertexSet, VertexSet, VertexSet>;
    2837using DistributionSets = std::vector<DistributionSet>;
    29 using TypeId = PabloAST::ClassTypeId;
     38
     39using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
     40
     41namespace pablo {
     42
    3043
    3144/** ------------------------------------------------------------------------------------------------------------- *
    32  * @brief intersects
     45 * @brief printGraph
    3346 ** ------------------------------------------------------------------------------------------------------------- */
    34 template <class Type>
    35 inline bool intersects(Type & A, Type & B) {
    36     auto first1 = A.begin(), last1 = A.end();
    37     auto first2 = B.begin(), last2 = B.end();
    38     while (first1 != last1 && first2 != last2) {
    39         if (*first1 < *first2) {
    40             ++first1;
    41         } else if (*first2 < *first1) {
    42             ++first2;
     47static void printGraph(const Graph & G, const std::string & name, llvm::raw_ostream & out) {
     48
     49    std::vector<unsigned> c(num_vertices(G));
     50    strong_components(G, make_iterator_property_map(c.begin(), get(vertex_index, G), c[0]));
     51
     52    out << "digraph " << name << " {\n";
     53    for (auto u : make_iterator_range(vertices(G))) {
     54        if (in_degree(u, G) == 0 && out_degree(u, G) == 0) {
     55            continue;
     56        }
     57        out << "v" << u << " [label=\"" << u << ": ";
     58        TypeId typeId;
     59        PabloAST * expr;
     60        std::tie(expr, typeId) = G[u];
     61        bool temporary = false;
     62        bool error = false;
     63        if (expr == nullptr || (typeId != expr->getClassTypeId() && typeId != TypeId::Var)) {
     64            temporary = true;
     65            switch (typeId) {
     66                case TypeId::And:
     67                    out << "And";
     68                    break;
     69                case TypeId::Or:
     70                    out << "Or";
     71                    break;
     72                case TypeId::Xor:
     73                    out << "Xor";
     74                    break;
     75                case TypeId::Not:
     76                    out << "Not";
     77                    break;
     78                default:
     79                    out << "???";
     80                    error = true;
     81                    break;
     82            }
     83            if (expr) {
     84                out << " ("; expr->print(out); out << ")";
     85            }
    4386        } else {
    44             return true;
    45         }
    46     }
    47     return false;
     87            expr->print(out);
     88        }
     89        out << "\"";
     90        if (typeId == TypeId::Var) {
     91            out << " style=dashed";
     92        }
     93        if (error) {
     94            out << " color=red";
     95        } else if (temporary) {
     96            out << " color=blue";
     97        }
     98        out << "];\n";
     99    }
     100    for (auto e : make_iterator_range(edges(G))) {
     101        const auto s = source(e, G);
     102        const auto t = target(e, G);
     103        out << "v" << s << " -> v" << t;
     104        bool cyclic = (c[s] == c[t]);
     105        if (G[e] || cyclic) {
     106            out << " [";
     107            PabloAST * const expr = G[e];
     108            if (expr) {
     109                out << "label=\"";
     110                expr->print(out);
     111                out << "\" ";
     112             }
     113             if (cyclic) {
     114                out << "color=red ";
     115             }
     116             out << "]";
     117        }
     118        out << ";\n";
     119    }
     120
     121    if (num_vertices(G) > 0) {
     122
     123        out << "{ rank=same;";
     124        for (auto u : make_iterator_range(vertices(G))) {
     125            if (in_degree(u, G) == 0 && out_degree(u, G) != 0) {
     126                out << " v" << u;
     127            }
     128        }
     129        out << "}\n";
     130
     131        out << "{ rank=same;";
     132        for (auto u : make_iterator_range(vertices(G))) {
     133            if (out_degree(u, G) == 0 && in_degree(u, G) != 0) {
     134                out << " v" << u;
     135            }
     136        }
     137        out << "}\n";
     138
     139    }
     140
     141    out << "}\n\n";
     142    out.flush();
    48143}
    49144
    50 /** ------------------------------------------------------------------------------------------------------------- *
    51  * @brief independentCliqueSets
    52  ** ------------------------------------------------------------------------------------------------------------- */
    53 static BicliqueSet && independentCliqueSets(BicliqueSet && bicliques, const bool uppsetSet) {
    54     using IndependentSetGraph = adjacency_list<hash_setS, vecS, undirectedS, unsigned>;
    55 
    56     const auto l = bicliques.size();
    57     IndependentSetGraph I(l);
    58 
    59 
    60     // Initialize our weights and determine the constraints
    61     if (uppsetSet) {
    62         for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
    63             I[std::distance(bicliques.begin(), i)] = std::pow(std::get<0>(*i).size(), 2);
    64             for (auto j = i; ++j != bicliques.end(); ) {
    65                 if (intersects(i->first, j->first)) {
    66                     add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
    67                 }
    68             }
    69         }
    70     } else {
    71         for (auto i = bicliques.begin(); i != bicliques.end(); ++i) {
    72             I[std::distance(bicliques.begin(), i)] = std::pow(std::get<1>(*i).size(), 2);
    73             for (auto j = i; ++j != bicliques.end(); ) {
    74                 if (intersects(i->first, j->first) && intersects(i->second, j->second)) {
    75                     add_edge(std::distance(bicliques.begin(), i), std::distance(bicliques.begin(), j), I);
    76                 }
    77             }
    78         }
    79     }
    80 
    81 
    82     // Use the greedy algorithm to choose our independent set
    83     VertexSet selected;
    84     for (;;) {
    85         unsigned w = 0;
    86         Vertex u = 0;
     145struct PassContainer {
     146
     147    /** ------------------------------------------------------------------------------------------------------------- *
     148     * @brief run
     149     *
     150     * Based on the knowledge that:
     151     *
     152     *  (ASSOCIATIVITY)    A ∧ (B ∧ C) ⇔ (A ∧ B) ∧ C ⇔ A ∧ B ∧ C   and   A √ (B √ C) ⇔ (A √ B) √ C ⇔ A √ B √ C
     153     *
     154     *  (IDENTITY)         A √ 0 ⇔ A   and   A ∧ 1 = A
     155     *
     156     *  (ANNULMENT)        A ∧ 0 ⇔ 0   and   A √ 1 = 1
     157     *
     158     *  (IDEMPOTENT)       A √ (A √ B) ⇔ A √ B   and   A ∧ (A ∧ B) ⇔ A ∧ B
     159     *
     160     *  (ABSORBTION)       A √ (A ∧ B) ⇔ A ∧ (A √ B) ⇔ A
     161     *
     162     *  (COMPLEMENT)       A √ ¬A ⇔ 1   and   A ∧ ¬A = 0
     163     *
     164     *  (DISTRIBUTIVITY)   (A ∧ B) √ (A ∧ C) ⇔ A ∧ (B √ C)   and   (A √ B) ∧ (A √ C) ⇔ A √ (B ∧ C)
     165     *
     166     * Try to eliminate some of the unnecessary operations from the current Variadic expressions
     167     ** ------------------------------------------------------------------------------------------------------------- */
     168    void run(PabloBlock * const block) {
     169        Statement * stmt = block->front();
     170        // Statement * first = stmt;
     171        while (stmt) {
     172            Statement * next = stmt->getNextNode();
     173            if (isa<Branch>(stmt)) {
     174                // addUsageInformation();
     175                if (simplifyGraph()) {
     176                    // rewriteAST(first, stmt);
     177
     178                    // printGraph(G, "G", errs());
     179                }
     180
     181
     182
     183                G.clear();
     184                M.clear();
     185                run(cast<Branch>(stmt)->getBody());
     186                G.clear();
     187                M.clear();
     188            } else {
     189                addStatement(stmt);
     190            }
     191            stmt = next;
     192        }
     193    }
     194
     195protected:
     196
     197//    /** ------------------------------------------------------------------------------------------------------------- *
     198//     * @brief addUsageInformation
     199//     *
     200//     * Populate G with the user information of each statement so that we can determine whether it'll be cost effective
     201//     * to distribute an operation.
     202//     ** ------------------------------------------------------------------------------------------------------------- */
     203//    void addUsageInformation() {
     204//        for (const Vertex u : make_iterator_range(vertices(G))) {
     205//            PabloAST * expr; TypeId typeId;
     206//            std::tie(expr, typeId) = G[u];
     207//            if (LLVM_LIKELY(typeId != TypeId::Var)) {
     208//                for (PabloAST * user : expr->users()) {
     209//                    add_edge(u, addExpression(user), user, G);
     210//                }
     211//            }
     212//        }
     213//    }
     214
     215    /** ------------------------------------------------------------------------------------------------------------- *
     216     * @brief applyAssociativeIdentityAnnulmentLaws
     217     ** ------------------------------------------------------------------------------------------------------------- */
     218    bool applyAssociativeIdentityAnnulmentLaws() {
     219
     220        bool modified = false;
     221
     222        std::function<void(const Vertex)> apply = [&](const Vertex u) {
     223            PabloAST * expr; TypeId typeId;
     224            std::tie(expr, typeId) = G[u];
     225
     226
     227
     228            if (LLVM_UNLIKELY(typeId == TypeId::Zeroes || typeId == TypeId::Ones)) {
     229repeat:         for (auto e : make_iterator_range(out_edges(u, G))) {
     230                    const auto v = target(e, G);
     231                    const auto targetTypeId = getType(v);
     232                    if (LLVM_UNLIKELY(isAssociative(targetTypeId))) {
     233                        if (isIdentityRelation(typeId, targetTypeId)) {
     234                            remove_edge(e, G);
     235                        } else {
     236                            setType(v, typeId == TypeId::And ? TypeId::Zeroes : TypeId::Ones);
     237                            clear_in_edges(v, G);
     238                            apply(v);
     239                        }
     240                        modified = true;
     241                        goto repeat;
     242                    }
     243                }
     244            } else if (isAssociative(typeId)) {
     245
     246                bool contractable = true;
     247                // an associative operation with only one element is always equivalent to the element
     248                if (LLVM_LIKELY(in_degree(u, G) != 1)) {
     249                    for (auto e : make_iterator_range(out_edges(u, G))) {
     250                        if (LLVM_LIKELY(typeId != getType(target(e, G)))) {
     251                            contractable = false;
     252                            break;
     253                        }
     254                    }
     255                }
     256                if (LLVM_UNLIKELY(contractable)) {
     257                    for (auto ei : make_iterator_range(in_edges(u, G))) {
     258                        for (auto ej : make_iterator_range(out_edges(u, G))) {
     259                            add_edge(source(ei, G), target(ej, G), expr, G);
     260                        }
     261                    }
     262                    clear_vertex(u, G);
     263                    setType(u, TypeId::Var);
     264                    modified = true;
     265                }
     266            }
     267        };
     268
     269        // note: calls "apply" on each vertex in reverse topological order
     270        topological_sort(G, boost::make_function_output_iterator(apply));
     271
     272        return modified;
     273    }
     274
     275    /** ------------------------------------------------------------------------------------------------------------- *
     276     * @brief applyAbsorbtionComplementIdempotentLaws
     277     ** ------------------------------------------------------------------------------------------------------------- */
     278    bool applyAbsorbtionComplementIdempotentLaws() {
     279        using in_edge_iterator = graph_traits<Graph>::in_edge_iterator;
     280        bool modified = false;
     281        for (const Vertex u : make_iterator_range(vertices(G))) {
     282            const TypeId typeId = getType(u);
     283            if (isDistributive(typeId)) {
     284restart_loop:   in_edge_iterator ei_begin, ei_end;
     285                std::tie(ei_begin, ei_end) = in_edges(u, G);
     286                for (auto ei = ei_begin; ei != ei_end; ++ei) {
     287                    const auto v = source(*ei, G);
     288                    const auto innerTypeId = getType(v);
     289                    if (isDistributive(innerTypeId) || innerTypeId == TypeId::Not) {
     290                        in_edge_iterator ek_begin, ek_end;
     291                        std::tie(ek_begin, ek_end) = in_edges(v, G);
     292                        for (auto ej = ei_begin; ej != ei_end; ++ej) {
     293                            for (auto ek = ek_begin; ek != ek_end; ++ek) {
     294                                if (LLVM_UNLIKELY(source(*ej, G) == source(*ek, G))) {
     295                                    if (LLVM_UNLIKELY(innerTypeId == TypeId::Not)) {
     296                                        // complement
     297                                        setType(u, typeId == TypeId::And ? TypeId::Zeroes : TypeId::Ones);
     298                                        clear_in_edges(u, G);
     299                                        goto abort_loop;
     300                                    } else {
     301                                        if (LLVM_LIKELY(innerTypeId != typeId)) {
     302                                            // idempotent
     303                                            remove_edge(*ei, G);
     304                                        } else {
     305                                            // absorbtion
     306                                            remove_edge(*ej, G);
     307                                        }
     308                                        modified = true;
     309                                        // this seldom occurs so if it does, just restart the process rather than
     310                                        // trying to keep the iterators valid.
     311                                        goto restart_loop;
     312                                    }
     313                                }
     314                            }
     315                        }
     316                    }
     317                }
     318            }
     319            abort_loop:;
     320        }
     321        return modified;
     322    }
     323
     324
     325    /** ------------------------------------------------------------------------------------------------------------- *
     326     * @brief contractGraph
     327     *
     328     * This function scans through a scope block and computes a DAG G in which any sequences of AND, OR or XOR functions
     329     * are "flattened" (i.e., allowed to have any number of inputs.)
     330     ** ------------------------------------------------------------------------------------------------------------- */
     331    bool contractGraph() {
     332        bool modified = false;
     333        bool alreadyGoneOnce = false;
     334        for (;;) {
     335            if (applyAssociativeIdentityAnnulmentLaws()) {
     336                modified = true;
     337            } else if (alreadyGoneOnce) {
     338                break;
     339            }
     340            if (applyAbsorbtionComplementIdempotentLaws()) {
     341                modified = true;
     342            } else { // if (alreadyGoneOnce) {
     343                break;
     344            }
     345            alreadyGoneOnce = true;
     346        }
     347        return modified;
     348    }
     349
     350    /** ------------------------------------------------------------------------------------------------------------- *
     351     * @brief addVertex
     352     ** ------------------------------------------------------------------------------------------------------------- */
     353    DistributionVertex addVertex(const Vertex u) {
     354        const auto f = D.find(u);
     355        if (LLVM_LIKELY(f != D.end())) {
     356            return f->second;
     357        }
     358        const auto v = add_vertex(u, H);
     359        D.emplace(u, v);
     360        return v;
     361    }
     362
     363    /** ------------------------------------------------------------------------------------------------------------- *
     364     * @brief generateDistributionGraph
     365     *
     366     * Let (u) ∈ V(G) be a conjunction ∧ or disjunction √ and (v) be a ∧ or √ and the opposite type of (u). If (u,v) ∈
     367     * E(G) and all outgoing edges of (v) lead to a vertex of the same type, add (u), (v) and any vertex (w) in which
     368     * (w,v) ∈ E(G) to the distribution graph H as well as the edges indicating their relationships within G.
     369     *
     370     *                  (?) (?) (?) <-- w1, w2, ...
     371     *                     \ | /
     372     *                      (v)   <-- v
     373     *                     /   \
     374     *            u --> (∧)     (∧)
     375     *
     376     ** ------------------------------------------------------------------------------------------------------------- */
     377    void generateDistributionGraph() {
     378
     379        assert (D.empty());
     380
     381        flat_set<Vertex> distributable;
     382        flat_set<Vertex> observed;
     383
     384        for (const Vertex u : make_iterator_range(vertices(G))) {
     385            const TypeId outerTypeId = getType(u);
     386            if (isDistributive(outerTypeId)) {
     387                const TypeId innerTypeId = oppositeTypeId(outerTypeId);
     388                for (auto e : make_iterator_range(in_edges(u, G))) {
     389                    const Vertex v = source(e, G);
     390                    if (LLVM_UNLIKELY(std::get<1>(G[v]) == innerTypeId)) {
     391                        bool beneficial = true;
     392                        for (const auto e : make_iterator_range(out_edges(v, G))) {
     393                            if (std::get<1>(G[target(e, G)]) != outerTypeId) {
     394                                beneficial = false;
     395                                break;
     396                            }
     397                        }
     398                        if (beneficial) {
     399                            distributable.insert(v);
     400                        }
     401                    }
     402                }
     403                if (LLVM_LIKELY(distributable.size() > 1)) {
     404                    for (const Vertex v : distributable) {
     405                        for (auto e : make_iterator_range(in_edges(v, G))) {
     406                            observed.insert(source(e, G));
     407                        }
     408                    }
     409                    for (const Vertex w : observed) {
     410                        for (auto e : make_iterator_range(out_edges(w, G))) {
     411                            const Vertex v = target(e, G);
     412                            if (distributable.count(v)) {
     413                                const Vertex y = addVertex(v);
     414                                boost::add_edge(y, addVertex(u), H);
     415                                boost::add_edge(addVertex(w), y, H);
     416                            }
     417                        }
     418                    }
     419                    observed.clear();
     420                }
     421                distributable.clear();
     422            }
     423        }
     424
     425        D.clear();
     426    }
     427
     428
     429    /** ------------------------------------------------------------------------------------------------------------- *
     430     * @brief redistributeAST
     431     *
     432     * Apply the distribution law to reduce computations whenever possible.
     433     ** ------------------------------------------------------------------------------------------------------------- */
     434    bool simplifyGraph() {
     435
     436        VertexSet sinks;
     437
     438        bool modified = false;
     439
     440        for (;;) {
     441
     442            assert (num_vertices(H) == 0 && num_edges(H) == 0);
     443
     444            if (contractGraph()) {
     445                modified = true;
     446            }
     447
     448            generateDistributionGraph();
     449
     450            // If we found no potential opportunities then we cannot apply the distribution law to any part of G.
     451            if (num_vertices(H) == 0) {
     452                break;
     453            }
     454
     455            for (const Vertex u : make_iterator_range(vertices(H))) {
     456                if (out_degree(u, H) == 0 && in_degree(u, H) != 0) {
     457                    sinks.push_back(u);
     458                }
     459            }
     460            std::sort(sinks.begin(), sinks.end());
     461
     462            bool done = true;
     463
     464            const auto lowerSet = independentCliqueSets<1>(removeUnhelpfulBicliques(enumerateBicliques(sinks)), 1);
     465
     466            for (auto & lower : lowerSet) {
     467                const auto upperSet = independentCliqueSets<0>(enumerateBicliques(std::get<1>(lower)), 2);
     468                for (const auto & upper : upperSet) {
     469
     470                    const auto & sources = std::get<1>(upper);
     471                    const auto & intermediary = std::get<0>(upper);
     472                    const auto & sinks = std::get<0>(lower);
     473
     474                    const auto outerTypeId = getType(H[sinks.front()]);
     475                    const auto innerTypeId = oppositeTypeId(outerTypeId);
     476
     477                    // Update G to match the desired change
     478                    const auto x = makeVertex(outerTypeId);
     479                    const auto y = makeVertex(innerTypeId);
     480
     481                    for (const auto i : intermediary) {
     482                        const auto u = H[i];
     483                        assert (getType(u) == innerTypeId);
     484                        for (const Vertex t : sinks) {
     485                            assert (getType(H[t]) == outerTypeId);
     486                            remove_edge(u, H[t], G);
     487                        }
     488                        add_edge(u, x, nullptr, G);
     489                    }
     490
     491                    for (const Vertex s : sources) {
     492                        const auto u = H[s];
     493                        for (const Vertex i : intermediary) {
     494                            remove_edge(u, H[i], G);
     495                        }
     496                        add_edge(u, y, nullptr, G);
     497                    }
     498                    add_edge(x, y, nullptr, G);
     499
     500                    for (const Vertex t : sinks) {
     501                        const auto v = H[t];
     502                        add_edge(y, v, std::get<0>(G[v]), G);
     503                    }
     504
     505                    done = false;
     506                }
     507            }
     508
     509            H.clear();
     510
     511            if (done) {
     512                break;
     513            } else {
     514                sinks.clear();
     515                modified = true;
     516            }
     517        }
     518
     519        assert (num_vertices(H) == 0 && num_edges(H) == 0);
     520
     521        return modified;
     522    }
     523
     524
     525    /** ------------------------------------------------------------------------------------------------------------- *
     526     * @brief enumerateBicliques
     527     *
     528     * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal
     529     * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in
     530     * bipartition A and their adjacencies to be in B.
     531      ** ------------------------------------------------------------------------------------------------------------- */
     532
     533    BicliqueSet enumerateBicliques(const VertexSet & A) {
     534        using IntersectionSets = std::set<VertexSet>;
     535
     536        IntersectionSets B1;
     537        for (auto u : A) {
     538            if (in_degree(u, H) > 0) {
     539                VertexSet incomingAdjacencies;
     540                incomingAdjacencies.reserve(in_degree(u, H));
     541                for (auto e : make_iterator_range(in_edges(u, H))) {
     542                    incomingAdjacencies.push_back(source(e, H));
     543                }
     544                std::sort(incomingAdjacencies.begin(), incomingAdjacencies.end());
     545                B1.insert(std::move(incomingAdjacencies));
     546            }
     547        }
     548
     549        IntersectionSets B(B1);
     550
     551        IntersectionSets Bi;
     552
     553        VertexSet clique;
     554        for (auto i = B1.begin(); i != B1.end(); ++i) {
     555            for (auto j = i; ++j != B1.end(); ) {
     556                std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     557                if (clique.size() > 0) {
     558                    if (B.count(clique) == 0) {
     559                        Bi.insert(clique);
     560                    }
     561                    clique.clear();
     562                }
     563            }
     564        }
     565
     566        for (;;) {
     567            if (Bi.empty()) {
     568                break;
     569            }
     570            B.insert(Bi.begin(), Bi.end());
     571            IntersectionSets Bk;
     572            for (auto i = B1.begin(); i != B1.end(); ++i) {
     573                for (auto j = Bi.begin(); j != Bi.end(); ++j) {
     574                    std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(clique));
     575                    if (clique.size() > 0) {
     576                        if (B.count(clique) == 0) {
     577                            Bk.insert(clique);
     578                        }
     579                        clique.clear();
     580                    }
     581                }
     582            }
     583            Bi.swap(Bk);
     584        }
     585
     586        BicliqueSet cliques;
     587        cliques.reserve(B.size());
     588        for (auto Bi = B.begin(); Bi != B.end(); ++Bi) {
     589            VertexSet Ai(A);
     590            for (const Vertex u : *Bi) {
     591                VertexSet Aj;
     592                Aj.reserve(out_degree(u, H));
     593                for (auto e : make_iterator_range(out_edges(u, H))) {
     594                    Aj.push_back(target(e, H));
     595                }
     596                std::sort(Aj.begin(), Aj.end());
     597                VertexSet Ak;
     598                Ak.reserve(std::min(Ai.size(), Aj.size()));
     599                std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
     600                Ai.swap(Ak);
     601            }
     602            assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly
     603            cliques.emplace_back(std::move(Ai), std::move(*Bi));
     604        }
     605        return std::move(cliques);
     606    }
     607
     608
     609    /** ------------------------------------------------------------------------------------------------------------- *
     610     * @brief independentCliqueSets
     611     ** ------------------------------------------------------------------------------------------------------------- */
     612    template <unsigned side>
     613    BicliqueSet && independentCliqueSets(BicliqueSet && cliques, const unsigned minimum) {
     614
     615
     616        const auto l = cliques.size();
     617        IndependentSetGraph I(l);
     618
     619        // Initialize our weights
    87620        for (unsigned i = 0; i != l; ++i) {
    88             if (I[i] > w) {
    89                 w = I[i];
    90                 u = i;
    91             }
    92         }
    93         if (w < (uppsetSet ? 2 : 1)) break;
    94         selected.push_back(u);
    95         I[u] = 0;
    96         for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
    97             I[v] = 0;
    98         }
    99     }
    100 
    101     // Sort the selected list and then remove the unselected cliques
    102     std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
    103     auto end = bicliques.end();
    104     for (const unsigned offset : selected) {
    105         end = bicliques.erase(bicliques.begin() + offset + 1, end) - 1;
    106     }
    107     bicliques.erase(bicliques.begin(), end);
    108 
    109     return std::move(bicliques);
    110 }
    111 
    112 /** ------------------------------------------------------------------------------------------------------------- *
    113  * @brief enumerateBicliques
    114  *
    115  * Adaptation of the MICA algorithm as described in "Consensus algorithms for the generation of all maximal
    116  * bicliques" by Alexe et. al. (2003). Note: this implementation considers all verticies in set A to be in
    117  * bipartition A and their adjacencies to be in B.
    118   ** ------------------------------------------------------------------------------------------------------------- */
    119 static BicliqueSet enumerateBicliques(const Graph & G, const VertexSet & A, const unsigned min) {
    120     using IntersectionSets = std::set<VertexSet>;
    121 
    122     IntersectionSets B1;
    123     VertexSet tmp;
    124     for (auto u : A) {
    125         if (in_degree(u, G) > 0) {
    126             tmp.reserve(in_degree(u, G));
    127             for (auto e : make_iterator_range(in_edges(u, G))) {
    128                 tmp.push_back(source(e, G));
    129             }
    130             if (tmp.size() >= min) {
    131                 std::sort(tmp.begin(), tmp.end());
    132                 B1.emplace(tmp.begin(), tmp.end());
    133             }
    134             tmp.clear();
    135         }
    136     }
    137 
    138     IntersectionSets B(B1);
    139 
    140     IntersectionSets Bi;
    141     for (auto i = B1.begin(); i != B1.end(); ++i) {
    142         for (auto j = i; ++j != B1.end(); ) {
    143             std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(tmp));
    144             if (tmp.size() >= min) {
    145                 if (B.count(tmp) == 0) {
    146                     Bi.emplace(tmp.begin(), tmp.end());
    147                 }
    148             }
    149             tmp.clear();
    150         }
    151     }
    152 
    153     for (;;) {
    154         if (Bi.empty()) {
    155             break;
    156         }
    157         B.insert(Bi.begin(), Bi.end());
    158         IntersectionSets Bk;
    159         for (auto i = B1.begin(); i != B1.end(); ++i) {
    160             for (auto j = Bi.begin(); j != Bi.end(); ++j) {
    161                 std::set_intersection(i->begin(), i->end(), j->begin(), j->end(), std::back_inserter(tmp));
    162                 if (tmp.size() >= min) {
    163                     if (B.count(tmp) == 0) {
    164                         Bk.emplace(tmp.begin(), tmp.end());
    165                     }
    166                 }
    167                 tmp.clear();
    168             }
    169         }
    170         Bi.swap(Bk);
    171     }
    172 
    173     BicliqueSet cliques;
    174     cliques.reserve(B.size());
    175     for (auto Bi = B.begin(); Bi != B.end(); ++Bi) {
    176         assert (Bi->size() >= min);
    177         VertexSet Ai(A);
    178         for (const Vertex u : *Bi) {
    179             VertexSet Aj;
    180             Aj.reserve(out_degree(u, G));
    181             for (auto e : make_iterator_range(out_edges(u, G))) {
    182                 Aj.push_back(target(e, G));
    183             }
    184             std::sort(Aj.begin(), Aj.end());
    185             VertexSet Ak;
    186             Ak.reserve(std::min(Ai.size(), Aj.size()));
    187             std::set_intersection(Ai.begin(), Ai.end(), Aj.begin(), Aj.end(), std::back_inserter(Ak));
    188             Ai.swap(Ak);
    189         }
    190         assert (Ai.size() > 0); // cannot happen if this algorithm is working correctly
    191         cliques.emplace_back(std::move(Ai), std::move(*Bi));
    192     }
    193     return std::move(cliques);
    194 }
    195 
    196 /** ------------------------------------------------------------------------------------------------------------- *
    197  * @brief removeUnhelpfulBicliques
    198  *
    199  * An intermediary vertex could have more than one outgoing edge but if any that are not directed to vertices in
    200  * the lower biclique, we'll need to compute that specific value anyway. Remove them from the clique set and if
    201  * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
    202  ** ------------------------------------------------------------------------------------------------------------- */
    203 static BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques, const Graph & G) {
    204     for (auto ci = cliques.begin(); ci != cliques.end(); ) {
    205         const auto cardinalityA = std::get<0>(*ci).size();
    206         VertexSet & B = std::get<1>(*ci);
    207         for (auto bi = B.begin(); bi != B.end(); ) {
    208             if (G[*bi]->getNumUses() == cardinalityA) {
    209                 ++bi;
     621            I[i] = std::pow(std::get<side>(cliques[i]).size(), 2);
     622        }
     623
     624        // Determine our constraints
     625        for (unsigned i = 0; i != l; ++i) {
     626            for (unsigned j = i + 1; j != l; ++j) {
     627                if (intersects(std::get<side>(cliques[i]), std::get<side>(cliques[j]))) {
     628                    boost::add_edge(i, j, I);
     629                }
     630            }
     631        }
     632
     633        // Use the greedy algorithm to choose our independent set
     634        VertexSet selected;
     635        for (;;) {
     636            unsigned w = 0;
     637            Vertex u = 0;
     638            for (unsigned i = 0; i != l; ++i) {
     639                if (I[i] > w) {
     640                    w = I[i];
     641                    u = i;
     642                }
     643            }
     644            if (w < minimum) break;
     645            selected.push_back(u);
     646            I[u] = 0;
     647            for (auto v : make_iterator_range(adjacent_vertices(u, I))) {
     648                I[v] = 0;
     649            }
     650        }
     651
     652        // Sort the selected list and then remove the unselected cliques
     653        std::sort(selected.begin(), selected.end(), std::greater<Vertex>());
     654        auto end = cliques.end();
     655        for (const unsigned offset : selected) {
     656            end = cliques.erase(cliques.begin() + offset + 1, end) - 1;
     657        }
     658        cliques.erase(cliques.begin(), end);
     659
     660        return std::move(cliques);
     661    }
     662
     663    /** ------------------------------------------------------------------------------------------------------------- *
     664     * @brief removeUnhelpfulBicliques
     665     *
     666     * An intermediary vertex could have more than one outgoing edge but if any that are not directed to vertices in
     667     * the lower biclique, we'll need to compute that specific value anyway. Remove them from the clique set and if
     668     * there are not enough vertices in the biclique to make distribution profitable, eliminate the clique.
     669     ** ------------------------------------------------------------------------------------------------------------- */
     670    BicliqueSet && removeUnhelpfulBicliques(BicliqueSet && cliques) {
     671        for (auto ci = cliques.begin(); ci != cliques.end(); ) {
     672            const auto cardinalityA = std::get<0>(*ci).size();
     673            VertexSet & B = std::get<1>(*ci);
     674            for (auto bi = B.begin(); bi != B.end(); ) {
     675                if (out_degree(H[*bi], G) == cardinalityA) {
     676                    ++bi;
     677                } else {
     678                    bi = B.erase(bi);
     679                }
     680            }
     681            if (B.size() > 1) {
     682                ++ci;
    210683            } else {
    211                 bi = B.erase(bi);
    212             }
    213         }
    214         if (B.size() > 1) {
    215             ++ci;
    216         } else {
    217             ci = cliques.erase(ci);
    218         }
    219     }
    220     return std::move(cliques);
    221 }
    222 
    223 /** ------------------------------------------------------------------------------------------------------------- *
    224  * @brief safeDistributionSets
    225  ** ------------------------------------------------------------------------------------------------------------- */
    226 inline static DistributionSets safeDistributionSets(const Graph & G, const VertexSet & A) {
    227     DistributionSets T;
    228     BicliqueSet lowerSet = independentCliqueSets(removeUnhelpfulBicliques(enumerateBicliques(G, A, 1), G), false);
    229     for (Biclique & lower : lowerSet) {
    230         BicliqueSet upperSet = independentCliqueSets(enumerateBicliques(G, std::get<1>(lower), 2), true);
    231         for (Biclique & upper : upperSet) {
    232             T.emplace_back(std::move(std::get<1>(upper)), std::move(std::get<0>(upper)), std::get<0>(lower));
    233         }
    234     }
    235     return std::move(T);
    236 }
    237 
    238 /** ------------------------------------------------------------------------------------------------------------- *
    239  * @brief scopeDepthOf
    240  ** ------------------------------------------------------------------------------------------------------------- */
    241 inline unsigned scopeDepthOf(const PabloBlock * block) {
    242     unsigned depth = 0;
    243     for (; block; ++depth, block = block->getPredecessor());
    244     return depth;
    245 }
    246 
    247 /** ------------------------------------------------------------------------------------------------------------- *
    248  * @brief findInsertionPoint
    249  ** ------------------------------------------------------------------------------------------------------------- */
    250 inline PabloBlock * findInsertionPoint(const VertexSet & users, const Graph & G) {
    251     std::vector<PabloBlock *> scopes(0);
    252     scopes.reserve(users.size());
    253     for (Vertex u : users) {
    254         PabloBlock * const scope = cast<Statement>(G[u])->getParent(); assert (scope);
    255         if (std::find(scopes.begin(), scopes.end(), scope) == scopes.end()) {
    256             scopes.push_back(scope);
    257         }
    258     }
    259     while (scopes.size() > 1) {
    260         // Find the LCA of both scopes then add the LCA back to the list of scopes.
    261         PabloBlock * scope1 = scopes.back();
    262         scopes.pop_back();
    263         PabloBlock * scope2 = scopes.back();
    264         scopes.pop_back();
    265         assert (scope1 != scope2);
    266         unsigned depth1 = scopeDepthOf(scope1);
    267         unsigned depth2 = scopeDepthOf(scope2);
    268         // If one of these scopes is nested deeper than the other, scan upwards through
    269         // the scope tree until both scopes are at the same depth.
    270         while (depth1 > depth2) {
    271             scope1 = scope1->getPredecessor();
    272             --depth1;
    273         }
    274         while (depth1 < depth2) {
    275             scope2 = scope2->getPredecessor();
    276             --depth2;
    277         }
    278         assert (depth1 == depth2);
    279         // Then iteratively step backwards until we find a matching scopes; this must be
    280         // the LCA of our original pair.
    281         while (scope1 != scope2) {
    282             scope1 = scope1->getPredecessor();
    283             scope2 = scope2->getPredecessor();
    284         }
    285         assert (scope1 && scope2);
    286         if (std::find(scopes.begin(), scopes.end(), scope1) == scopes.end()) {
    287             scopes.push_back(scope1);
    288         }
    289     }
    290     assert (scopes.size() == 1);
    291     PabloBlock * const root = scopes.front();
    292     // Now that we know the common scope of these users, test which statement is the first to require it.
    293     flat_set<Statement *> usages;
    294     usages.reserve(users.size());
    295     for (Vertex u : users) {
    296         Statement * user = cast<Statement>(G[u]);
    297         PabloBlock * scope = user->getParent();
    298         while (scope != root) {
    299             assert (scope);
    300             user = scope->getBranch();
    301             scope = scope->getPredecessor();
    302         }
    303         usages.insert(user);
    304     }
    305     Statement * ip = nullptr;
    306     for (Statement * stmt : *root) {
    307         if (usages.count(stmt)) {
    308             ip = stmt->getPrevNode();
    309             break;
    310         }
    311     }
    312     assert (ip);
    313     root->setInsertPoint(ip);
    314     return root;
    315 }
    316 
    317 /** ------------------------------------------------------------------------------------------------------------- *
    318  * @brief computeDistributionGraph
    319  ** ------------------------------------------------------------------------------------------------------------- */
    320 static inline void computeDistributionGraph(Variadic * const expr, Graph & G, VertexSet & A) {
    321 
    322     TypeId outerTypeId = expr->getClassTypeId();
    323     TypeId innerTypeId = (outerTypeId == TypeId::And) ? TypeId::Or : TypeId::And;
    324 
    325     assert (isa<And>(expr) || isa<Or>(expr));
    326 
    327     Map M;
    328     for (unsigned i = 0; i != expr->getNumOperands(); ++i) {
    329         PabloAST * const op = expr->getOperand(i);
    330         if (op->getClassTypeId() == innerTypeId) {
    331             bool distributable = true;
    332             for (PabloAST * user : op->users()) {
    333                 // Early check to see whether it'd be beneficial to distribute it. If this fails, we'd have
    334                 // to compute the operand's value anyway, so just ignore this operand.
    335                 if (user->getClassTypeId() != outerTypeId) {
    336                     distributable = false;
    337                     break;
    338                 }
    339             }
    340             if (distributable) {
    341                 const Vertex u = add_vertex(op, G);
    342                 for (PabloAST * user : op->users()) {
    343                     const auto f = M.find(user);
    344                     Vertex v = 0;
    345                     if (LLVM_LIKELY(f != M.end())) {
    346                         v = f->second;
    347                     } else {
    348                         v = add_vertex(user, G);
    349                         M.emplace(user, v);
    350                         A.push_back(v);
    351                     }
    352                     add_edge(u, v, G);
    353                 }
    354                 for (PabloAST * input : *cast<Variadic>(op)) {
    355                     const auto f = M.find(input);
    356                     Vertex v = 0;
    357                     if (f != M.end()) {
    358                         v = f->second;
    359                     } else {
    360                         v = add_vertex(input, G);
    361                         M.emplace(input, v);
    362                     }
    363                     add_edge(v, u, G);
    364                 }
    365             }
    366         }
    367     }
    368 }
    369 
    370 /** ------------------------------------------------------------------------------------------------------------- *
    371  * @brief distribute
    372  *
    373  * Based on the knowledge that:
    374  *
    375  *   (P ∧ Q) √ (P ∧ R) ⇔ P ∧ (Q √ R) and (P √ Q) ∧ (P √ R) ⇔ P √ (Q ∧ R)
    376  *
    377  * Try to eliminate some of the unnecessary operations from the current Variadic expression.
    378  ** ------------------------------------------------------------------------------------------------------------- */
    379 inline void DistributivePass::distribute(Variadic * const var) {
    380 
    381 
    382 
    383     assert (isa<And>(var) || isa<Or>(var));
     684                ci = cliques.erase(ci);
     685            }
     686        }
     687        return std::move(cliques);
     688    }
     689
     690    /** ------------------------------------------------------------------------------------------------------------- *
     691     * @brief addTemporary
     692     ** ------------------------------------------------------------------------------------------------------------- */
     693    Vertex makeVertex(const TypeId typeId, PabloAST * const expr = nullptr) {
     694        return add_vertex(std::make_pair(expr, typeId), G);
     695    }
     696
     697    /** ------------------------------------------------------------------------------------------------------------- *
     698     * @brief addExpression
     699     ** ------------------------------------------------------------------------------------------------------------- */
     700    Vertex addExpression(PabloAST * const expr) {
     701        const auto f = M.find(expr);
     702        if (LLVM_LIKELY(f != M.end())) {
     703            return f->second;
     704        }
     705        TypeId typeId = TypeId::Var;
     706        if (isa<Zeroes>(expr)) {
     707            typeId = TypeId::Zeroes;
     708        } else if (isa<Ones>(expr)) {
     709            typeId = TypeId::Ones;
     710        }
     711        const auto u = makeVertex(typeId, expr);
     712        M.emplace(expr, u);
     713        return u;
     714    }
     715
     716    /** ------------------------------------------------------------------------------------------------------------- *
     717     * @brief addStatement
     718     ** ------------------------------------------------------------------------------------------------------------- */
     719    void addStatement(Statement * const stmt) {
     720        assert (M.count(stmt) == 0);
     721        const auto typeId = stmt->getClassTypeId();
     722        const auto u = makeVertex(typeId, stmt);
     723        M.emplace(stmt, u);
     724        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     725            PabloAST * const op = stmt->getOperand(i);
     726            if (LLVM_UNLIKELY(isa<Integer>(op) || isa<String>(op))) {
     727                continue;
     728            }
     729            const auto v = addExpression(op);
     730            add_edge(v, u, op, G);
     731            if (LLVM_UNLIKELY(isa<Not>(op))) {
     732                PabloAST * const negated = cast<Not>(op)->getExpr();
     733                add_edge(addExpression(negated), v, negated, G);
     734            }
     735        }
     736    }
     737
     738    /** ------------------------------------------------------------------------------------------------------------- *
     739     * @brief intersects
     740     ** ------------------------------------------------------------------------------------------------------------- */
     741    template <class Type>
     742    inline bool intersects(Type & A, Type & B) {
     743        auto first1 = A.begin(), last1 = A.end();
     744        auto first2 = B.begin(), last2 = B.end();
     745        while (first1 != last1 && first2 != last2) {
     746            if (*first1 < *first2) {
     747                ++first1;
     748            } else if (*first2 < *first1) {
     749                ++first2;
     750            } else {
     751                return true;
     752            }
     753        }
     754        return false;
     755    }
     756
     757    TypeId getType(const Vertex u) {
     758        return std::get<1>(G[u]);
     759    }
     760
     761    void setType(const Vertex u, const TypeId typeId) {
     762        std::get<1>(G[u]) = typeId;
     763    }
     764
     765    static bool isIdentityRelation(const TypeId a, const TypeId b) {
     766        return !((a == TypeId::Zeroes) ^ (b == TypeId::Or));
     767    }
     768
     769    static bool isAssociative(const TypeId typeId) {
     770        return (isDistributive(typeId) || typeId == TypeId::Xor);
     771    }
     772
     773    static bool isDistributive(const TypeId typeId) {
     774        return (typeId == TypeId::And || typeId == TypeId::Or);
     775    }
     776
     777    static TypeId oppositeTypeId(const TypeId typeId) {
     778        assert (isDistributive(typeId));
     779        return (typeId == TypeId::And) ? TypeId::Or : TypeId::And;
     780    }
     781
     782    void add_edge(const Vertex u, const Vertex v, PabloAST * const value, Graph & G) {
     783        G[std::get<0>(boost::add_edge(u, v, G))] = value;
     784    }
     785
     786private:
    384787
    385788    Graph G;
    386     VertexSet A;
    387 
    388     std::vector<Variadic *> Q = {var};
    389 
    390     while (!Q.empty()) {
    391         Variadic * expr = CanonicalizeDFG::canonicalize(Q.back()); Q.pop_back();
    392         PabloAST * const replacement = Simplifier::fold(expr, expr->getParent());
    393         if (LLVM_UNLIKELY(replacement != nullptr)) {
    394             expr->replaceWith(replacement, true, true);
    395             if (LLVM_UNLIKELY(isa<Variadic>(replacement))) {
    396                 Q.push_back(cast<Variadic>(replacement));
    397             }
    398             continue;
    399         }
    400 
    401         if (LLVM_LIKELY(isa<And>(expr) || isa<Or>(expr))) {
    402 
    403             computeDistributionGraph(expr, G, A);
    404 
    405             // If we found no potential opportunities then we cannot apply the distribution law to any part of G.
    406             if (num_vertices(G) == 0) {
    407                 assert (A.empty());
    408                 continue;
    409             }
    410 
    411             const auto S = safeDistributionSets(G, A);
    412             if (S.empty()) {
    413                 G.clear();
    414                 A.clear();
    415                 continue;
    416             }
    417 
    418             Q.push_back(expr);
    419 
    420             for (const DistributionSet & set : S) {
    421 
    422                 // Each distribution tuple consists of the sources, intermediary, and sink nodes.
    423                 const VertexSet & sources = std::get<0>(set);
    424                 assert (sources.size() > 0);
    425                 const VertexSet & intermediary = std::get<1>(set);
    426                 assert (intermediary.size() > 1);
    427                 const VertexSet & sinks = std::get<2>(set);
    428                 assert (sinks.size() > 0);
    429 
    430                 for (const Vertex u : intermediary) {
    431                     for (const Vertex v : sources) {
    432                         cast<Variadic>(G[u])->deleteOperand(G[v]);
    433                     }
    434                 }
    435                 for (const Vertex u : sinks) {
    436                     for (const Vertex v : intermediary) {
    437                         cast<Variadic>(G[u])->deleteOperand(G[v]);
    438                     }
    439                 }
    440 
    441                 PabloBlock * const block = findInsertionPoint(sinks, G);
    442                 Variadic * innerOp = nullptr;
    443                 Variadic * outerOp = nullptr;
    444                 if (isa<And>(expr)) {
    445                     outerOp = block->createAnd(intermediary.size());
    446                     innerOp = block->createOr(sources.size() + 1);
    447                 } else {
    448                     outerOp = block->createOr(intermediary.size());
    449                     innerOp = block->createAnd(sources.size() + 1);
    450                 }
    451                 for (const Vertex v : intermediary) {
    452                     outerOp->addOperand(G[v]);
    453                 }
    454                 for (const Vertex v : sources) {
    455                     innerOp->addOperand(G[v]);
    456                 }
    457                 for (const Vertex u : sinks) {
    458                     cast<Variadic>(G[u])->addOperand(innerOp);
    459                 }
    460                 innerOp->addOperand(outerOp);
    461                 // Push our newly constructed ops into the Q
    462                 Q.push_back(innerOp);
    463                 Q.push_back(outerOp);
    464 
    465                 unmodified = false;
    466             }
    467 
    468             G.clear();
    469             A.clear();
    470         }
    471     }
    472 
    473 }
    474 
    475 /** ------------------------------------------------------------------------------------------------------------- *
    476  * @brief distribute
    477  ** ------------------------------------------------------------------------------------------------------------- */
    478 void DistributivePass::distribute(PabloBlock * const block) {
    479     Statement * stmt = block->front();
    480     while (stmt) {
    481         Statement * next = stmt->getNextNode();
    482         if (isa<If>(stmt) || isa<While>(stmt)) {
    483             distribute(isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody());
    484         } else if (isa<And>(stmt) || isa<Or>(stmt)) {
    485             distribute(cast<Variadic>(stmt));
    486         }
    487         stmt = next;
    488     }
    489 }
     789    flat_map<pablo::PabloAST *, Vertex> M;
     790
     791    DistributionGraph H;
     792    flat_map<Vertex, DistributionVertex> D;
     793
     794};
    490795
    491796/** ------------------------------------------------------------------------------------------------------------- *
    492797 * @brief optimize
    493798 ** ------------------------------------------------------------------------------------------------------------- */
    494 bool DistributivePass::optimize(PabloFunction & function) {
    495     DistributivePass dp;
    496     unsigned rounds = 0;
    497     for (;;) {
    498         dp.unmodified = true;
    499         dp.distribute(function.getEntryBlock());
    500         if (dp.unmodified) {
    501             break;
    502         }
    503         ++rounds;
    504         #ifndef NDEBUG
    505         PabloVerifier::verify(function, "post-distribution");
    506         #endif
    507         Simplifier::optimize(function);
    508     }
    509     return rounds > 0;
     799bool DistributivePass::optimize(PabloKernel * const kernel) {
     800    PassContainer C;
     801    C.run(kernel->getEntryBlock());
     802    return true;
    510803}
    511804
    512 
    513805}
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/distributivepass.h

    r4927 r5464  
    1 #ifndef DISTRIBUTIVEPASS_H
    2 #define DISTRIBUTIVEPASS_H
     1#ifndef PABLO_DISTRIBUTIVEPASS_H
     2#define PABLO_DISTRIBUTIVEPASS_H
    33
    44namespace pablo {
    55
    6 class PabloFunction;
    7 class PabloBlock;
    8 class Variadic;
     6class PabloKernel;
    97
    108class DistributivePass {
    119public:
    12     static bool optimize(PabloFunction & function);
    13 protected:
    14     void distribute(PabloBlock * const block);
    15     void distribute(Variadic * const var);
    16     DistributivePass() = default;
    17 private:
    18     bool unmodified;
     10    static bool optimize(pablo::PabloKernel * const kernel);
    1911};
    2012
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5454 r5464  
    1414#include <llvm/IR/Module.h>
    1515
    16 using namespace pablo;
    1716using namespace kernel;
    1817using namespace parabix;
     
    2928    return false;
    3029}
     30
     31namespace pablo {
    3132
    3233Var * PabloKernel::getInputStreamVar(const std::string & name) {
     
    209210}
    210211
     212}
  • icGREP/icgrep-devel/icgrep/pablo/pablo_toolchain.cpp

    r5454 r5464  
    99#include <pablo/optimizers/pablo_simplifier.hpp>
    1010#include <pablo/optimizers/codemotionpass.h>
    11 #include <pablo/passes/flattenassociativedfg.h>
     11#include <pablo/optimizers/distributivepass.h>
    1212#include <pablo/passes/flattenif.hpp>
    13 #include <pablo/passes/factorizedfg.h>
    14 #ifdef ENABLE_MULTIPLEXING
    15 #include <pablo/optimizers/pablo_automultiplexing.hpp>
    16 #include <pablo/optimizers/pablo_bddminimization.h>
    17 #include <pablo/optimizers/booleanreassociationpass.h>
    18 #include <pablo/optimizers/distributivepass.h>
    19 #include <pablo/optimizers/schedulingprepass.h>
    20 #endif
    2113#include <pablo/analysis/pabloverifier.hpp>
    2214#include <pablo/printer_pablos.h>
     
    2416#include <llvm/Support/FileSystem.h>
    2517#include <llvm/Support/raw_ostream.h>
    26 #ifdef PRINT_TIMING_INFORMATION
    27 #include <hrtime.h>
    28 #endif
    29 
    3018
    3119using namespace llvm;
     
    4331DebugOptions(cl::values(clEnumVal(ShowPablo, "Print generated Pablo code"),
    4432                        clEnumVal(ShowOptimizedPablo, "Print optimizeed Pablo code"),
    45                         clEnumVal(ShowUnloweredPablo, "Print Pablo code prior to lowering."),
    4633                        clEnumVal(DumpTrace, "Generate dynamic traces of executed Pablo assignments."),
    4734                        clEnumValEnd), cl::cat(PabloOptions));
     
    5340    PabloOptimizationsOptions(cl::values(clEnumVal(DisableSimplification, "Disable Pablo Simplification pass (not recommended)"),
    5441                                         clEnumVal(DisableCodeMotion, "Moves statements into the innermost legal If-scope and moves invariants out of While-loops."),
    55 #ifdef ENABLE_MULTIPLEXING
    56                                          clEnumVal(EnableMultiplexing, "combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
    57                                          clEnumVal(EnableLowering, "coalesce associative functions prior to optimization passes."),
    58                                          clEnumVal(EnablePreDistribution, "apply distribution law optimization prior to multiplexing."),
    59                                          clEnumVal(EnablePostDistribution, "apply distribution law optimization after multiplexing."),
    60                                          clEnumVal(EnablePrePassScheduling, "apply pre-pass scheduling prior to LLVM IR generation."),
    61 #endif                                         
    62                             clEnumValEnd), cl::cat(PabloOptions));
     42                                         clEnumVal(EnableDistribution, "apply distribution law optimization."),
     43                                         clEnumValEnd), cl::cat(PabloOptions));
    6344
    6445bool DebugOptionIsSet(PabloDebugFlags flag) {return DebugOptions.isSet(flag);}
    6546   
    66 
    67 
    68 #ifdef PRINT_TIMING_INFORMATION
    69 #define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
    70 #else
    71 #define READ_CYCLE_COUNTER(name)
    72 #endif
    73 
    74 #ifdef PRINT_TIMING_INFORMATION
    75 unsigned COUNT_STATEMENTS(const PabloKernel * const entry) {
    76     std::stack<const Statement *> scope;
    77     unsigned statements = 0;
    78     // Scan through and collect all the advances, scanthrus and matchstars ...
    79     for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
    80         while ( stmt ) {
    81             ++statements;
    82             if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
    83                 // Set the next statement to be the first statement of the inner scope and push the
    84                 // next statement of the current statement into the scope stack.
    85                 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    86                 scope.push(stmt->getNextNode());
    87                 stmt = nested->front();
    88                 assert (stmt);
    89                 continue;
    90             }
    91             stmt = stmt->getNextNode();
    92         }
    93         if (scope.empty()) {
    94             break;
    95         }
    96         stmt = scope.top();
    97         scope.pop();
    98     }
    99     return statements;
    100 }
    101 
    102 unsigned COUNT_ADVANCES(const PabloKernel * const entry) {
    103 
    104     std::stack<const Statement *> scope;
    105     unsigned advances = 0;
    106 
    107     // Scan through and collect all the advances, scanthrus and matchstars ...
    108     for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
    109         while ( stmt ) {
    110             if (isa<Advance>(stmt)) {
    111                 ++advances;
    112             }
    113             else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
    114                 // Set the next statement to be the first statement of the inner scope and push the
    115                 // next statement of the current statement into the scope stack.
    116                 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    117                 scope.push(stmt->getNextNode());
    118                 stmt = nested->front();
    119                 assert (stmt);
    120                 continue;
    121             }
    122             stmt = stmt->getNextNode();
    123         }
    124         if (scope.empty()) {
    125             break;
    126         }
    127         stmt = scope.top();
    128         scope.pop();
    129     }
    130     return advances;
    131 }
    132 
    133 using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
    134 
    135 DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloKernel * const entry) {
    136     std::stack<const Statement *> scope;
    137     DistributionMap distribution;
    138     // Scan through and collect all the advances, scanthrus and matchstars ...
    139     for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
    140         while ( stmt ) {
    141             if (isa<Variadic>(stmt)) {
    142                 auto f = distribution.find(stmt->getNumOperands());
    143                 if (f == distribution.end()) {
    144                     distribution.emplace(stmt->getNumOperands(), 1);
    145                 } else {
    146                     f->second += 1;
    147                 }
    148             }
    149             else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
    150                 // Set the next statement to be the first statement of the inner scope and push the
    151                 // next statement of the current statement into the scope stack.
    152                 const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
    153                 scope.push(stmt->getNextNode());
    154                 stmt = nested->front();
    155                 assert (stmt);
    156                 continue;
    157             }
    158             stmt = stmt->getNextNode();
    159         }
    160         if (scope.empty()) {
    161             break;
    162         }
    163         stmt = scope.top();
    164         scope.pop();
    165     }
    166     return distribution;
    167 }
    168 #endif
    16947
    17048void pablo_function_passes(PabloKernel * kernel) {
     
    18159
    18260    // Scan through the pablo code and perform DCE and CSE
    183 
    184 #ifdef PRINT_TIMING_INFORMATION
    185     timestamp_t simplification_start = 0, simplification_end = 0;
    186     timestamp_t flattenif_start = 0, flattenif_end = 0;
    187     timestamp_t coalescing_start = 0, coalescing_end = 0;
    188     timestamp_t sinking_start = 0, sinking_end = 0;
    189     timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
    190     timestamp_t multiplexing_start = 0, multiplexing_end = 0;
    191     timestamp_t post_distribution_start = 0, post_distribution_end = 0;
    192     timestamp_t lowering_start = 0, lowering_end = 0;
    193     timestamp_t scheduling_start = 0, scheduling_end = 0;
    194     DistributionMap distribution;
    195     const timestamp_t optimization_start = read_cycle_counter();
    196 #endif
    19761    if (LLVM_LIKELY(!PabloOptimizationsOptions.isSet(DisableSimplification))) {
    198         READ_CYCLE_COUNTER(simplification_start);
    19962        Simplifier::optimize(kernel);
    200         READ_CYCLE_COUNTER(simplification_end);
    20163    }
    20264    if (Flatten){
    203         READ_CYCLE_COUNTER(flattenif_start);
    20465        FlattenIf::transform(kernel);
    205         READ_CYCLE_COUNTER(flattenif_end);
    20666    }
    207 #ifdef ENABLE_MULTIPLEXING
    208 //    if (PabloOptimizationsOptions.isSet(EnableLowering) || PabloOptimizationsOptions.isSet(EnablePreDistribution) || PabloOptimizationsOptions.isSet(EnablePostDistribution)) {
    209 //        READ_CYCLE_COUNTER(coalescing_start);
    210 //        CanonicalizeDFG::transform(kernel);
    211 //        READ_CYCLE_COUNTER(coalescing_end);
    212 //    }
    213     if (PabloOptimizationsOptions.isSet(EnablePreDistribution)) {
    214         READ_CYCLE_COUNTER(pre_distribution_start);
    215         BooleanReassociationPass::optimize(kernel);
    216         READ_CYCLE_COUNTER(pre_distribution_end);
     67    if (LLVM_LIKELY(!PabloOptimizationsOptions.isSet(DisableCodeMotion))) {
     68        CodeMotionPass::optimize(kernel);
    21769    }
    218     if (PabloOptimizationsOptions.isSet(EnableMultiplexing)) {
    219         READ_CYCLE_COUNTER(multiplexing_start);
    220         MultiplexingPass::optimize(kernel);
    221         READ_CYCLE_COUNTER(multiplexing_end);
    222 //        if (PabloOptimizationsOptions.isSet(EnableLowering) || PabloOptimizationsOptions.isSet(EnablePreDistribution) || PabloOptimizationsOptions.isSet(EnablePostDistribution)) {
    223 //            CanonicalizeDFG::transform(kernel);
    224 //        }
     70    if (PabloOptimizationsOptions.isSet(EnableDistribution)) {
     71        DistributivePass::optimize(kernel);
    22572    }
    226     if (PabloOptimizationsOptions.isSet(EnablePostDistribution)) {
    227         READ_CYCLE_COUNTER(post_distribution_start);
    228         BooleanReassociationPass::optimize(kernel);
    229         READ_CYCLE_COUNTER(post_distribution_end);
    230     }
    231 #endif
    232     if (LLVM_LIKELY(!PabloOptimizationsOptions.isSet(DisableCodeMotion))) {
    233         READ_CYCLE_COUNTER(sinking_start);
    234         CodeMotionPass::optimize(kernel);
    235         READ_CYCLE_COUNTER(sinking_end);
    236     }
    237 #ifdef ENABLE_MULTIPLEXING
    238     if (DebugOptions.isSet(ShowUnloweredPablo)) {
    239         //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
    240         errs() << "Unlowered Pablo AST:\n";
    241         PabloPrinter::print(kernel, errs());
    242     }
    243     #ifdef PRINT_TIMING_INFORMATION
    244     distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
    245     #endif
    246 //    if (PabloOptimizationsOptions.isSet(EnableLowering) || PabloOptimizationsOptions.isSet(EnablePreDistribution) || PabloOptimizationsOptions.isSet(EnablePostDistribution)) {
    247 //        READ_CYCLE_COUNTER(lowering_start);
    248 //        FactorizeDFG::transform(kernel);
    249 //        READ_CYCLE_COUNTER(lowering_end);
    250 //    }
    251 //    if (PabloOptimizationsOptions.isSet(EnablePrePassScheduling)) {
    252 //        READ_CYCLE_COUNTER(scheduling_start);
    253 //        SchedulingPrePass::optimize(kernel);
    254 //        READ_CYCLE_COUNTER(scheduling_end);
    255 //    }
    256 #endif
    257 #ifdef PRINT_TIMING_INFORMATION
    258     const timestamp_t optimization_end = read_cycle_counter();
    259 #endif
    26073    if (DebugOptions.isSet(ShowOptimizedPablo)) {
    26174        if (PabloOutputFilename.empty()) {
     
    26982        }
    27083    }
    271 #ifdef PRINT_TIMING_INFORMATION
    272     errs() << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << "\n";
    273     errs() << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << "\n";
    274     errs() << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << "\n";
    275     errs() << "  SINKING TIME: " << (sinking_end - sinking_start) << "\n";
    276     errs() << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << "\n";
    277     errs() << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << "\n";
    278     errs() << "  LOWERING TIME: " << (lowering_end - lowering_start) << "\n";
    279     errs() << "  FLATTENIF TIME: " << (flattenif_end - flattenif_start) << "\n";
    280     errs() << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << "\n";
    281     errs() << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << "\n";
    282     errs() << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << "\n";
    283     errs() << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << "\n";
    284     errs() << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
    285     bool join = false;
    286     for (auto dist : distribution) {
    287         if (join) {
    288             errs() << ';';
    289         }
    290         errs() << dist.first << '|' << dist.second;
    291         join = true;
    292     }
    293     errs() << "\n";
    294 #endif
    29584}
    29685
  • icGREP/icgrep-devel/icgrep/pablo/pablo_toolchain.h

    r5454 r5464  
    1414
    1515enum PabloDebugFlags {
    16     ShowPablo, ShowOptimizedPablo, ShowUnloweredPablo, DumpTrace,
     16    ShowPablo, ShowOptimizedPablo, DumpTrace,
    1717};
    1818
    1919enum PabloCompilationFlags {
    20     DisableSimplification, DisableCodeMotion,
    21     EnableMultiplexing, EnableLowering, EnablePreDistribution, EnablePostDistribution, EnablePrePassScheduling
     20    DisableSimplification, DisableCodeMotion, EnableDistribution
    2221};
    2322   
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5418 r5464  
    644644    RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    645645    GrepEngine engine;
    646     engine.grepCodeGen("NamePattern", { propValueRe }, false, false, GrepSource::Internal, GrepType::PropertyValue);
     646    engine.grepCodeGen({ propValueRe }, false, false, GrepSource::Internal, GrepType::PropertyValue);
    647647    const auto matches = engine.grepPropertyValues(propName);
    648648    if (matches.empty()) {
     
    677677   
    678678    GrepEngine engine;
    679     engine.grepCodeGen("NamePattern", { embedded }, false, false, GrepSource::Internal, GrepType::NameExpression);
     679    engine.grepCodeGen({ embedded }, false, false, GrepSource::Internal, GrepType::NameExpression);
    680680    CC * codepoints = engine.grepCodepoints();
    681681   
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.cpp

    r5461 r5464  
    88#include <IR_Gen/idisa_target.h>
    99#include <kernels/kernel_builder.h>
     10#include <kernels/kernel.h>
     11#include <llvm/Transforms/Scalar.h>
     12#include <llvm/Transforms/Utils/Local.h>
    1013#include <toolchain/toolchain.h>
    11 #include <IR_Gen/llvm2ptx.h>
    12 #include <llvm/Transforms/Scalar.h>
    13 
     14#include <toolchain/pipeline.h>
     15#include <llvm/Analysis/TargetLibraryInfo.h>
     16#include <llvm/CodeGen/MIRParser/MIRParser.h>
     17#include <llvm/IR/LegacyPassManager.h>
     18#include <llvm/IR/Module.h>
     19#include <llvm/Support/FileSystem.h>
     20#include <llvm/Support/TargetRegistry.h>
     21#include <llvm/Support/TargetSelect.h>
     22#include <llvm/Support/ToolOutputFile.h>
     23#include <llvm/Target/TargetMachine.h>
    1424
    1525using namespace llvm;
    16 using namespace parabix;
    17 
    18 using Kernel = kernel::Kernel;
    19 using KernelBuilder = kernel::KernelBuilder;
    20 
     26
     27using StreamSetBuffer = parabix::StreamSetBuffer;
    2128
    2229NVPTXDriver::NVPTXDriver(std::string && moduleName)
    23 : mContext(new llvm::LLVMContext())
    24 , mMainModule(new Module(moduleName, *mContext))
    25 , iBuilder(nullptr)
    26 , mTarget(nullptr)
    27 , mEngine(nullptr) {
     30: Driver(std::move(moduleName)) {
    2831
    2932    InitializeAllTargets();
     
    3235    InitializeAllAsmParsers();
    3336
    34     PassRegistry *Registry = PassRegistry::getPassRegistry();
     37    PassRegistry * Registry = PassRegistry::getPassRegistry();
    3538    initializeCore(*Registry);
    3639    initializeCodeGen(*Registry);
     
    4144    mMainModule->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    4245    mMainModule->setTargetTriple("nvptx64-nvidia-cuda");
    43     codegen::BlockSize = 64;
    4446
    4547    iBuilder.reset(IDISA::GetIDISA_GPU_Builder(*mContext));
     
    4850}
    4951
    50 ExternalBuffer * NVPTXDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
    51     mOwnedBuffers.emplace_back(std::move(b));
    52     return cast<ExternalBuffer>(mOwnedBuffers.back().get());
    53 }
    54 
    55 StreamSetBuffer * NVPTXDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
    56     b->allocateBuffer(iBuilder);
    57     mOwnedBuffers.emplace_back(std::move(b));
    58     return mOwnedBuffers.back().get();
    59 }
    60 
    61 kernel::Kernel * NVPTXDriver::addKernelInstance(std::unique_ptr<kernel::Kernel> kb) {
    62     mOwnedKernels.emplace_back(std::move(kb));
    63     return mOwnedKernels.back().get();
    64 }
    65 
    66 void NVPTXDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    67     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    68     mPipeline.emplace_back(&kb);
    69     kb.bindPorts(inputs, outputs);
    70     kb.setModule(iBuilder, mMainModule);
    71 }
    72 
    73 void NVPTXDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     52void NVPTXDriver::makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    7453    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    75     mPipeline.emplace_back(kb);   
     54    mPipeline.emplace_back(kb);
    7655    kb->bindPorts(inputs, outputs);
    7756    kb->setModule(iBuilder, mMainModule);
     
    11190}
    11291
     92Function * NVPTXDriver::addLinkFunction(Module *, llvm::StringRef, FunctionType *, void *) const {
     93    report_fatal_error("NVPTX does not support linked functions");
     94}
     95
     96
     97static int llvm2ptx(Module * M, std::string PTXFilename) {
     98
     99    std::unique_ptr<MIRParser> MIR;
     100    Triple TheTriple(M->getTargetTriple());
     101
     102    if (TheTriple.getTriple().empty())
     103        TheTriple.setTriple(sys::getDefaultTargetTriple());
     104
     105    // Get the target specific parser.
     106    std::string Error;
     107    const auto TheTarget = TargetRegistry::lookupTarget(codegen::MArch, TheTriple, Error);
     108    if (!TheTarget) {
     109        report_fatal_error(Error);
     110    }
     111
     112    const auto CPUStr = codegen::getCPUStr();
     113    const auto FeaturesStr = codegen::getFeaturesStr();
     114
     115    std::unique_ptr<TargetMachine> Target(
     116                TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, FeaturesStr,
     117                                               codegen::Options, codegen::RelocModel, codegen::CMModel, codegen::OptLevel));
     118
     119    assert(Target && "Could not allocate target machine!");
     120
     121    // Figure out where we are going to send the output.
     122    std::error_code EC;
     123    sys::fs::OpenFlags OpenFlags = sys::fs::F_None | sys::fs::F_Text;
     124    std::unique_ptr<tool_output_file> Out = llvm::make_unique<tool_output_file>(PTXFilename, EC, OpenFlags);
     125    if (EC) {
     126        errs() << EC.message() << '\n';
     127        return 1;
     128    }
     129
     130    // Build up all of the passes that we want to do to the module.
     131    legacy::PassManager PM;
     132
     133    // Add an appropriate TargetLibraryInfo pass for the module's triple.
     134    TargetLibraryInfoImpl TLII(Triple(M->getTargetTriple()));
     135
     136    PM.add(new TargetLibraryInfoWrapperPass(TLII));
     137
     138    // Add the target data from the target machine, if it exists, or the module.
     139    M->setDataLayout(Target->createDataLayout());
     140
     141    // Override function attributes based on CPUStr, FeaturesStr, and command line
     142    // flags.
     143    codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
     144
     145    {
     146        raw_pwrite_stream *OS = &Out->os();
     147
     148        AnalysisID StartBeforeID = nullptr;
     149        AnalysisID StartAfterID = nullptr;
     150        AnalysisID StopAfterID = nullptr;
     151        const PassRegistry *PR = PassRegistry::getPassRegistry();
     152        if (!codegen::RunPass.empty()) {
     153            if (!codegen::StartAfter.empty() || !codegen::StopAfter.empty()) {
     154                errs() << "start-after and/or stop-after passes are redundant when run-pass is specified.\n";
     155                return 1;
     156            }
     157            const PassInfo *PI = PR->getPassInfo(codegen::RunPass);
     158            if (!PI) {
     159                errs() << "run-pass pass is not registered.\n";
     160                return 1;
     161            }
     162            StopAfterID = StartBeforeID = PI->getTypeInfo();
     163        } else {
     164            if (!codegen::StartAfter.empty()) {
     165                const PassInfo *PI = PR->getPassInfo(codegen::StartAfter);
     166                if (!PI) {
     167                    errs() << "start-after pass is not registered.\n";
     168                    return 1;
     169                }
     170                StartAfterID = PI->getTypeInfo();
     171            }
     172            if (!codegen::StopAfter.empty()) {
     173                const PassInfo *PI = PR->getPassInfo(codegen::StopAfter);
     174                if (!PI) {
     175                    errs() << "stop-after pass is not registered.\n";
     176                    return 1;
     177                }
     178                StopAfterID = PI->getTypeInfo();
     179            }
     180        }
     181
     182        // Ask the target to add backend passes as necessary.
     183        if (Target->addPassesToEmitFile(PM, *OS, codegen::FileType, false, StartBeforeID,
     184                                        StartAfterID, StopAfterID, MIR.get())) {
     185            errs() << " target does not support generation of this file type!\n";
     186            return 1;
     187        }
     188
     189        PM.run(*M);
     190    }
     191    // Declare success.
     192    Out->keep();
     193
     194    return 0;
     195}
     196
    113197void NVPTXDriver::finalizeAndCompile(Function * mainFunc, std::string PTXFilename) {
    114198
     
    134218    PM.run(*mMainModule); 
    135219
    136     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR)))
    137             mMainModule->dump();
     220    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
     221        mMainModule->dump();
     222    }
    138223
    139224    llvm2ptx(mMainModule, PTXFilename);
    140225}
    141226
    142 const std::unique_ptr<kernel::KernelBuilder> & NVPTXDriver::getBuilder() {
    143     return iBuilder;
    144 }
    145 
    146227NVPTXDriver::~NVPTXDriver() {
    147228}
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.h

    r5462 r5464  
    77#ifndef NVPTXDRIVER_H
    88#define NVPTXDRIVER_H
    9 #include <string>
    10 #include <IR_Gen/FunctionTypeBuilder.h>
    11 #include <kernels/kernel.h>
    12 #include <kernels/streamset.h>
    139
    14 namespace llvm { class ExecutionEngine; }
    15 namespace llvm { class Function; }
    16 namespace llvm { class Module; }
    17 namespace llvm { class TargetMachine; }
    18 namespace llvm { class formatted_raw_ostream; }
    19 namespace llvm { namespace cl { class OptionCategory; } }
    20 namespace kernel { class Kernel; }
    21 namespace kernel { class KernelBuilder; }
    22 namespace IDISA { class IDISA_Builder; }
     10#include "driver.h"
    2311
    24 class NVPTXDriver {
     12class NVPTXDriver final : public Driver {
    2513    friend class CBuilder;
    2614public:
     
    2816
    2917    ~NVPTXDriver();
     18
     19    void generatePipelineIR() override;
    3020   
    31     const std::unique_ptr<kernel::KernelBuilder> & getBuilder();
    32    
    33     parabix::ExternalBuffer * addExternalBuffer(std::unique_ptr<parabix::ExternalBuffer> b);
    34    
    35     parabix::StreamSetBuffer * addBuffer(std::unique_ptr<parabix::StreamSetBuffer> b);
    36    
    37     kernel::Kernel * addKernelInstance(std::unique_ptr<kernel::Kernel> kb);
    38    
    39     void addKernelCall(kernel::Kernel & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    40 
    41     void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    42    
    43     void generatePipelineIR();
    44    
    45     template <typename ExternalFunctionType>
    46     llvm::Function * LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
     21    void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) override;
    4722
    4823    void finalizeAndCompile(llvm::Function * mainFunc, std::string PTXFilename);
    49    
     24
    5025    void * getPointerToMain();
    5126
    52 protected:
     27private:
    5328
    54     llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
     29    llvm::Function * addLinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const override;
    5530
    56 private:
    57     std::unique_ptr<llvm::LLVMContext>                      mContext;
    58     llvm::Module * const                                    mMainModule;
    59     std::unique_ptr<kernel::KernelBuilder>                  iBuilder;
    60     llvm::TargetMachine *                                   mTarget;
    61     llvm::ExecutionEngine *                                 mEngine;
    62 
    63     std::vector<kernel::Kernel *>                           mPipeline;
    64     // Owned kernels and buffers that will persist with this ParabixDriver instance.
    65     std::vector<std::unique_ptr<kernel::Kernel>>            mOwnedKernels;
    66     std::vector<std::unique_ptr<parabix::StreamSetBuffer>>  mOwnedBuffers;
    6731};
    6832
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp

    r5440 r5464  
    11#include "object_cache.h"
    22#include <kernels/kernel.h>
     3#include <kernels/kernel_builder.h>
    34#include <llvm/Support/raw_ostream.h>
    45#include <llvm/Support/MemoryBuffer.h>
     6#include <llvm/IR/Metadata.h>
    57#include <llvm/Support/FileSystem.h>
    68#include <llvm/Support/Path.h>
     
    1012#include <fcntl.h>
    1113#include <boost/filesystem.hpp>
     14#include <boost/range/iterator_range.hpp>
    1215#include <ctime>
    1316
     
    5255                          '_'};
    5356
     57const static auto CACHEABLE = "cacheable";
     58
     59const static auto SIGNATURE = "signature";
     60
     61const static boost::uintmax_t CACHE_SIZE_LIMIT = 5 * 1024 * 1024;
     62
     63const MDString * getSignature(const llvm::Module * const M) {
     64    NamedMDNode * const sig = M->getNamedMetadata(SIGNATURE);
     65    if (sig) {
     66        assert ("empty metadata node" && sig->getNumOperands() == 1);
     67        assert ("no signature payload" && sig->getOperand(0)->getNumOperands() == 1);
     68        return cast<MDString>(sig->getOperand(0)->getOperand(0));
     69    }
     70    return nullptr;
     71}
     72
    5473bool ParabixObjectCache::loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel) {
    5574    if (LLVM_LIKELY(kernel->isCachable())) {
    56 
    5775        Module * const module = kernel->getModule();
    5876        assert ("kernel module cannot be null!" && module);
    5977        const auto moduleId = module->getModuleIdentifier();
    60 
    6178        // Have we already seen this module before?
    62         if (LLVM_UNLIKELY(mCachedObjectMap.count(moduleId) != 0)) {
    63             const auto f = mKernelSignatureMap.find(moduleId);
    64             if (f == mKernelSignatureMap.end()) {
    65                 return kernel->moduleIDisSignature();
    66             } else if (kernel->moduleIDisSignature() || (kernel->makeSignature(idb) != f->second)) {
    67                 return false;
    68             }
     79        if (LLVM_UNLIKELY(mCachedObject.count(moduleId) != 0)) {
    6980            return true;
    7081        }
     
    7889        auto objectBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false);
    7990        if (objectBuffer) {
    80             if (!kernel->moduleIDisSignature()) {
     91            if (kernel->hasSignature()) {
    8192                sys::path::replace_extension(objectName, ".sig");
    8293                const auto signatureBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false);
     
    91102                }
    92103            }
    93             // updae the modified time of the file then add it to our cache
     104            // update the modified time of the file then add it to our cache
    94105            boost::filesystem::last_write_time(objectName.c_str(), time(0));
    95             mCachedObjectMap.emplace(moduleId, std::move(objectBuffer.get()));
     106            mCachedObject.emplace(moduleId, std::move(objectBuffer.get()));
    96107            return true;
    97         } else if (!kernel->moduleIDisSignature()) {
    98             mKernelSignatureMap.emplace(moduleId, kernel->makeSignature(idb));
     108        } else {
     109            // mark this module as cachable
     110            module->getOrInsertNamedMetadata(CACHEABLE);
     111            // if this module has a signature, add it to the metadata
     112            if (kernel->hasSignature()) {
     113                NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE);
     114                assert (md->getNumOperands() == 0);
     115                MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb));               
     116                md->addOperand(MDNode::get(module->getContext(), {sig}));
     117            }
    99118        }
    100119    }
     
    105124// exists, write it out.
    106125void ParabixObjectCache::notifyObjectCompiled(const Module * M, MemoryBufferRef Obj) {
    107     const auto moduleId = M->getModuleIdentifier();
    108     if (mCachedObjectMap.count(moduleId) == 0) {
    109 
     126    if (M->getNamedMetadata(CACHEABLE)) {
     127        const auto moduleId = M->getModuleIdentifier();
    110128        Path objectName(mCachePath);
    111129        sys::path::append(objectName, CACHE_PREFIX);
     
    122140        outfile.close();
    123141
    124         // If this kernel has a signature, write it.
    125         const auto sig = mKernelSignatureMap.find(moduleId);
    126         if (LLVM_UNLIKELY(sig != mKernelSignatureMap.end())) {
     142        // If this module has a signature, write it.
     143        const MDString * const sig = getSignature(M);
     144        if (sig) {
    127145            sys::path::replace_extension(objectName, ".sig");
    128146            raw_fd_ostream sigfile(objectName, EC, sys::fs::F_None);
    129             sigfile << sig->second;
     147            sigfile << sig->getString();
    130148            sigfile.close();
    131149        }
     
    133151}
    134152
    135 /*  May need this.
    136 
    137 void ParabixObjectCache::removeCacheFile(std::string ModuleID) {
    138     Path CacheName(CacheDir);
    139     if (!getCacheFilename(ModuleID, CacheName)) return;
    140     sys::fs::remove(CacheName);
    141     // Also remove a signature file, if present.
    142     sys::path::replace_extension(CacheName, ".sig");
    143     sys::fs::remove(CacheName);
    144 }
    145 */
    146 
    147 std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module* M) {
    148     auto f = mCachedObjectMap.find(M->getModuleIdentifier());
    149     if (f == mCachedObjectMap.end()) {
     153void ParabixObjectCache::cleanUpObjectCacheFiles() {
     154
     155    using namespace boost::filesystem;
     156    using ObjectFile = std::pair<std::time_t, path>;
     157
     158    path cachePath(mCachePath.str());
     159    if (LLVM_LIKELY(is_directory(cachePath))) {
     160        std::vector<ObjectFile> files;
     161        for(const directory_entry & entry : boost::make_iterator_range(directory_iterator(cachePath), {})) {
     162            const auto path = entry.path();;
     163            if (LLVM_LIKELY(is_regular_file(path) && path.has_extension() && path.extension().compare(".o") == 0)) {
     164                files.emplace_back(last_write_time(path), path.filename());
     165            }
     166        }
     167        // sort the files in decending order of last modified (datetime) then file name
     168        std::sort(files.begin(), files.end(), std::greater<ObjectFile>());
     169        boost::uintmax_t cacheSize = 0;
     170        for(const ObjectFile & entry : files) {
     171            auto objectPath = cachePath / std::get<1>(entry);
     172            if (LLVM_LIKELY(exists(objectPath))) {
     173                const auto size = file_size(objectPath);
     174                if ((cacheSize + size) < CACHE_SIZE_LIMIT) {
     175                    cacheSize += size;
     176                } else {
     177                    remove(objectPath);
     178                    objectPath.replace_extension("sig");
     179                    remove(objectPath);
     180                }
     181            }
     182        }
     183    }
     184}
     185
     186std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module * module) {
     187    const auto moduleId = module->getModuleIdentifier();
     188    const auto f = mCachedObject.find(moduleId);
     189    if (f == mCachedObject.end()) {
    150190        return nullptr;
    151191    }
     
    157197    // $HOME/.cache/parabix/
    158198    Path cachePath;
    159     // TODO use path::user_cache_directory once we have llvm >= 3.7.
     199    #ifndef USE_LLVM_3_6
     200    sys::path::user_cache_directory(cachePath, "parabix");
     201    #else
    160202    sys::path::home_directory(cachePath);
    161203    sys::path::append(cachePath, ".cache", "parabix");
     204    #endif
    162205    return cachePath;
    163206}
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.h

    r5440 r5464  
    1111#include <llvm/ExecutionEngine/ObjectCache.h>
    1212#include <llvm/ADT/StringRef.h>
     13#include <boost/container/flat_map.hpp>
     14#include <vector>
    1315#include <string>
    14 #include <boost/container/flat_map.hpp>
    1516
    1617namespace llvm { class Module; }
     
    3637    template <typename K, typename V>
    3738    using Map = boost::container::flat_map<K, V>;
    38     using CacheEntry = std::pair<kernel::Kernel *, std::unique_ptr<llvm::MemoryBuffer>>;
    39     using CacheMap = Map<llvm::Module *, CacheEntry>;
     39    using ModuleCache = Map<std::string, std::unique_ptr<llvm::MemoryBuffer>>;
    4040public:
    4141    ParabixObjectCache();
     
    4343    bool loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel);
    4444    void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) override;
     45    void cleanUpObjectCacheFiles();
    4546    std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module * M) override;
    4647protected:
    4748    static Path getDefaultPath();
    4849private:
    49     Map<std::string, std::string>                           mKernelSignatureMap;
    50     Map<std::string, std::unique_ptr<llvm::MemoryBuffer>>   mCachedObjectMap;
    51     const Path                                              mCachePath;
     50    ModuleCache     mCachedObject;
     51    const Path      mCachePath;
    5252};
    5353
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5456 r5464  
    7777    }
    7878    Value * const segOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     79
     80
     81
    7982
    8083    BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc);
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5459 r5464  
    55 */
    66
    7 #include "toolchain.h"
    8 #include <IR_Gen/idisa_target.h>
    9 #include <llvm/CodeGen/CommandFlags.h>             // for InitTargetOptionsF...
    10 #include <llvm/ExecutionEngine/ExecutionEngine.h>  // for EngineBuilder
    11 #include <llvm/Support/CommandLine.h>              // for OptionCategory
    12 #include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
    13 #include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
    14 #include <llvm/IR/LegacyPassManager.h>             // for PassManager
    15 #include <llvm/IR/IRPrintingPasses.h>
    16 #include <llvm/InitializePasses.h>                 // for initializeCodeGen
    17 #include <llvm/PassRegistry.h>                     // for PassRegistry
    18 #include <llvm/Support/CodeGen.h>                  // for Level, Level::None
    19 #include <llvm/Support/Compiler.h>                 // for LLVM_UNLIKELY
    20 #include <llvm/Target/TargetMachine.h>             // for TargetMachine, Tar...
    21 #include <llvm/Target/TargetOptions.h>             // for TargetOptions
    22 #include <llvm/Transforms/Scalar.h>
    23 #include <llvm/Transforms/Utils/Local.h>
    24 #include <llvm/IR/Module.h>
    25 #include <toolchain/object_cache.h>
    26 #include <toolchain/pipeline.h>
    27 #include <kernels/kernel_builder.h>
    28 #include <kernels/kernel.h>
    29 #include <sys/stat.h>
    30 #include <llvm/IR/Verifier.h>
    31 #include <toolchain/NVPTXDriver.cpp>
    32 //#include <toolchain/workqueue.h>
    33 
     7#include <toolchain/toolchain.h>
     8#include <llvm/CodeGen/CommandFlags.h>
     9#include <llvm/Support/raw_ostream.h>
    3410
    3511using namespace llvm;
    36 using namespace parabix;
    37 
    38 using Kernel = kernel::Kernel;
    39 using KernelBuilder = kernel::KernelBuilder;
    4012
    4113#ifndef NDEBUG
     
    5931                        clEnumValEnd), cl::cat(CodeGenOptions));
    6032
    61 static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
     33static cl::opt<std::string> IROutputFilenameOption("dump-generated-IR-output", cl::init(""),
     34                                                       cl::desc("output IR filename"), cl::cat(CodeGenOptions));
     35
    6236#ifndef USE_LLVM_3_6
    63 static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
    64 static cl::opt<bool> AsmVerbose("asm-verbose",
    65                                 cl::desc("Add comments to directives."),
    66                                 cl::init(true), cl::cat(CodeGenOptions));
     37static cl::opt<std::string> ASMOutputFilenameOption("asm-output", cl::init(""),
     38                                                    cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
     39
     40static cl::opt<bool> AsmVerbose("asm-verbose", cl::init(true),
     41                                cl::desc("Add comments to directives."), cl::cat(CodeGenOptions));
    6742#endif
    6843
    69 char OptLevel;
    70 static cl::opt<char, true> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"), cl::location(OptLevel),
    71                               cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
     44static cl::opt<char> OptLevelOption("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O1')"),
     45                                    cl::cat(CodeGenOptions), cl::Prefix, cl::ZeroOrMore, cl::init('1'));
    7246
    7347
    74 static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
     48static cl::opt<bool> EnableObjectCacheOption("enable-object-cache",
     49                                             cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
    7550
    76 static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
     51static cl::opt<std::string> ObjectCacheDirOption("object-cache-dir",
     52                                                 cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
    7753
    7854
     55static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0),
     56                                          cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
     57
     58
     59static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize),
     60                                            cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
     61
     62static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::init(1),
     63                                               cl::desc("Buffer Segments"), cl::value_desc("positive integer"));
     64
     65
     66static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::init(2),
     67                                          cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"));
     68
     69
     70static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::init(IN_DEBUG_MODE),
     71                                               cl::desc("Enable Asserts"));
     72
     73static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::init(false),
     74                                                  cl::desc("Count and report CPU cycles per kernel"), cl::cat(CodeGenOptions));
     75
     76static cl::opt<bool, true> pipelineParallelOption("enable-pipeline-parallel", cl::location(pipelineParallel),
     77                                                  cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
     78   
     79static cl::opt<bool, true> segmentPipelineParallelOption("enable-segment-pipeline-parallel", cl::location(segmentPipelineParallel),
     80                                                         cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
     81
     82static cl::opt<bool> USENVPTX("NVPTX", cl::init(false),
     83                              cl::desc("Run on GPU only."));
     84
     85static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::init(256),
     86                                         cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"));
     87
     88
     89const CodeGenOpt::Level OptLevel = [](const char optLevel) {
     90    switch (optLevel) {
     91        case '0': return CodeGenOpt::None;
     92        case '1': return CodeGenOpt::Less;
     93        case '2': return CodeGenOpt::Default;
     94        case '3': return CodeGenOpt::Aggressive;
     95        default: report_fatal_error(optLevel + " is an invalid optimization level.");
     96    }
     97}(OptLevelOption);
     98
     99bool pipelineParallel;
     100bool segmentPipelineParallel;
     101const std::string ASMOutputFilename = ASMOutputFilenameOption;
     102const std::string IROutputFilename = IROutputFilenameOption;
     103const std::string ObjectCacheDir = ObjectCacheDirOption;
    79104int BlockSize;
    80105int SegmentSize;
     
    83108bool EnableAsserts;
    84109bool EnableCycleCounter;
     110const bool EnableObjectCache = EnableObjectCacheOption && (DebugOptions.getBits() == 0);
     111bool NVPTX;
     112int GroupNum;
    85113
    86 static cl::opt<int, true> BlockSizeOption("BlockSize", cl::location(BlockSize), cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(CodeGenOptions));
    87 static cl::opt<int, true> SegmentSizeOption("segment-size", cl::location(SegmentSize), cl::desc("Segment Size"), cl::value_desc("positive integer"), cl::init(1));
    88 static cl::opt<int, true> BufferSegmentsOption("buffer-segments", cl::location(BufferSegments), cl::desc("Buffer Segments"), cl::value_desc("positive integer"), cl::init(1));
    89 static cl::opt<int, true> ThreadNumOption("thread-num", cl::location(ThreadNum), cl::desc("Number of threads used for segment pipeline parallel"), cl::value_desc("positive integer"), cl::init(2));
    90 static cl::opt<bool, true> EnableAssertsOption("ea", cl::location(EnableAsserts), cl::desc("Enable Asserts"), cl::init(IN_DEBUG_MODE));
    91 static cl::opt<bool, true> EnableCycleCountOption("ShowKernelCycles", cl::location(EnableCycleCounter), cl::desc("Count and report CPU cycles per kernel"), cl::init(false), cl::cat(CodeGenOptions));
     114const llvm::Reloc::Model RelocModel = ::RelocModel;
     115const llvm::CodeModel::Model CMModel = ::CMModel;
     116const std::string MArch = ::MArch;
     117const std::string RunPass = ::RunPass;
     118const llvm::TargetMachine::CodeGenFileType FileType = ::FileType;
     119const std::string StopAfter = ::StopAfter;
     120const std::string StartAfter = ::StartAfter;
     121#ifndef USE_LLVM_3_6
     122const TargetOptions Options = [](const bool asmVerbose) {
     123    TargetOptions opt = InitTargetOptionsFromCodeGenFlags();
     124    opt.MCOptions.AsmVerbose = AsmVerbose;
     125    return opt;
     126}(AsmVerbose);
     127#else
     128const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
     129#endif
    92130
    93 const cl::OptionCategory * codegen_flags() {return &CodeGenOptions;}
     131const cl::OptionCategory * codegen_flags() {
     132    return &CodeGenOptions;
     133}
    94134
    95 bool DebugOptionIsSet(DebugFlags flag) {return DebugOptions.isSet(flag);}
     135bool DebugOptionIsSet(const DebugFlags flag) {
     136    return DebugOptions.isSet(flag);
     137}
    96138
    97 static cl::opt<bool> pipelineParallel("enable-pipeline-parallel", cl::desc("Enable multithreading with pipeline parallelism."), cl::cat(CodeGenOptions));
    98    
    99 static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
     139std::string getCPUStr() {
     140    return ::getCPUStr();
     141}
    100142
    101 bool NVPTX;
    102 int GroupNum;
    103 static cl::opt<bool> USENVPTX("NVPTX", cl::desc("Run on GPU only."), cl::init(false));
    104 static cl::opt<int, true> GroupNumOption("group-num", cl::location(GroupNum), cl::desc("NUmber of groups declared on GPU"), cl::value_desc("positive integer"), cl::init(256));
     143std::string getFeaturesStr() {
     144    return ::getFeaturesStr();
     145}
     146
     147void setFunctionAttributes(llvm::StringRef CPU, llvm::StringRef Features, llvm::Module &M) {
     148    return ::setFunctionAttributes(CPU, Features, M);
     149}
     150
    105151
    106152}
    107153
    108 void setNVPTXOption(){
     154void setNVPTXOption() {
    109155    codegen::NVPTX = codegen::USENVPTX;
    110     if(codegen::NVPTX){
    111 #ifndef CUDA_ENABLED
    112     errs() << "CUDA compiler is not supported.\n";
    113     exit(-1);
    114 #endif
     156    if (codegen::NVPTX) {
     157        #ifndef CUDA_ENABLED
     158        report_fatal_error("CUDA compiler is not supported.");
     159        #endif
    115160    }
    116161}
     
    125170}
    126171
    127 void setAllFeatures(EngineBuilder &builder) {
    128     StringMap<bool> HostCPUFeatures;
    129     if (sys::getHostCPUFeatures(HostCPUFeatures)) {
    130         std::vector<std::string> attrs;
    131         for (auto &flag : HostCPUFeatures) {
    132             auto enabled = flag.second ? "+" : "-";
    133             attrs.push_back(enabled + flag.first().str());
    134         }
    135         builder.setMAttrs(attrs);
    136     }
    137 }
    138 
    139172bool AVX2_available() {
    140173    StringMap<bool> HostCPUFeatures;
     
    145178    return false;
    146179}
    147 
    148 ParabixDriver::ParabixDriver(std::string && moduleName)
    149 : mContext(new llvm::LLVMContext())
    150 , mMainModule(new Module(moduleName, *mContext))
    151 , iBuilder(nullptr)
    152 , mTarget(nullptr)
    153 , mEngine(nullptr)
    154 , mCache(nullptr) {
    155 
    156     InitializeNativeTarget();
    157     InitializeNativeTargetAsmPrinter();
    158     InitializeNativeTargetAsmParser();
    159 
    160     PassRegistry * Registry = PassRegistry::getPassRegistry();
    161     initializeCore(*Registry);
    162     initializeCodeGen(*Registry);
    163     initializeLowerIntrinsicsPass(*Registry);
    164 
    165     std::string errMessage;
    166     EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
    167     builder.setUseOrcMCJITReplacement(true);
    168     builder.setErrorStr(&errMessage);
    169     TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
    170     opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
    171     builder.setTargetOptions(opts);
    172     builder.setVerifyModules(false);
    173     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    174     switch (codegen::OptLevel) {
    175         case '0': optLevel = CodeGenOpt::None; break;
    176         case '1': optLevel = CodeGenOpt::Less; break;
    177         case '2': optLevel = CodeGenOpt::Default; break;
    178         case '3': optLevel = CodeGenOpt::Aggressive; break;
    179         default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
    180     }
    181     builder.setOptLevel(optLevel);
    182     setAllFeatures(builder);
    183     mEngine = builder.create();
    184     if (mEngine == nullptr) {
    185         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    186     }
    187     mTarget = builder.selectTarget();
    188     if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
    189         if (codegen::ObjectCacheDir.empty()) {
    190             mCache = new ParabixObjectCache();
    191         } else {
    192             mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
    193         }
    194         mEngine->setObjectCache(mCache);
    195     }
    196 
    197     mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
    198 
    199     iBuilder.reset(IDISA::GetIDISA_Builder(*mContext, mMainModule->getTargetTriple()));
    200     iBuilder->setDriver(this);
    201     iBuilder->setModule(mMainModule);
    202 }
    203 
    204 ExternalBuffer * ParabixDriver::addExternalBuffer(std::unique_ptr<ExternalBuffer> b) {
    205     mOwnedBuffers.emplace_back(std::move(b));
    206     return cast<ExternalBuffer>(mOwnedBuffers.back().get());
    207 }
    208 
    209 StreamSetBuffer * ParabixDriver::addBuffer(std::unique_ptr<StreamSetBuffer> b) {
    210     b->allocateBuffer(iBuilder);
    211     mOwnedBuffers.emplace_back(std::move(b));
    212     return mOwnedBuffers.back().get();
    213 }
    214 
    215 Kernel * ParabixDriver::addKernelInstance(std::unique_ptr<Kernel> kb) {
    216     mOwnedKernels.emplace_back(std::move(kb));
    217     return mOwnedKernels.back().get();
    218 }
    219 
    220 void ParabixDriver::addKernelCall(Kernel & kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    221     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    222     mPipeline.emplace_back(&kb);
    223     kb.bindPorts(inputs, outputs);
    224     kb.makeModule(iBuilder);
    225 }
    226 
    227 void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    228     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    229     mPipeline.emplace_back(kb);   
    230     kb->bindPorts(inputs, outputs);
    231     kb->makeModule(iBuilder);
    232 }
    233 
    234 void ParabixDriver::generatePipelineIR() {
    235     #ifndef NDEBUG
    236     if (LLVM_UNLIKELY(mPipeline.empty())) {
    237         report_fatal_error("Pipeline cannot be empty");
    238     } else {
    239         for (auto i = mPipeline.begin(); i != mPipeline.end(); ++i) {
    240             for (auto j = i; ++j != mPipeline.end(); ) {
    241                 if (LLVM_UNLIKELY(*i == *j)) {
    242                     report_fatal_error("Kernel instances cannot occur twice in the pipeline");
    243                 }
    244             }
    245         }
    246     }
    247     #endif
    248     // note: instantiation of all kernels must occur prior to initialization
    249     for (const auto & k : mPipeline) {
    250         k->addKernelDeclarations(iBuilder);
    251     }
    252     for (const auto & k : mPipeline) {
    253         k->createInstance(iBuilder);
    254     }
    255     for (const auto & k : mPipeline) {
    256         k->initializeInstance(iBuilder);
    257     }
    258     if (codegen::pipelineParallel) {
    259         generateParallelPipeline(iBuilder, mPipeline);
    260     } else if (codegen::segmentPipelineParallel) {
    261         generateSegmentParallelPipeline(iBuilder, mPipeline);
    262     } else {
    263         codegen::ThreadNum = 1;
    264         generatePipelineLoop(iBuilder, mPipeline);
    265     }
    266     for (const auto & k : mPipeline) {
    267         k->finalizeInstance(iBuilder);
    268     }
    269 }
    270 
    271 Function * ParabixDriver::LinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    272     assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
    273     Function * f = cast<Function>(mod->getOrInsertFunction(name, type));
    274     mEngine->addGlobalMapping(f, functionPtr);
    275     return f;
    276 }
    277 
    278 void ParabixDriver::linkAndFinalize() {
    279 
    280     legacy::PassManager PM;
    281     std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    282     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
    283         if (codegen::IROutputFilename.empty()) {
    284             IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    285         } else {
    286             std::error_code error;
    287             IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    288         }
    289         PM.add(createPrintModulePass(*IROutputStream));
    290     }
    291 
    292     if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    293         PM.add(createVerifierPass());
    294     }
    295     PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    296     PM.add(createReassociatePass());             //Reassociate expressions.
    297     PM.add(createGVNPass());                     //Eliminate common subexpressions.
    298     PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    299     PM.add(createCFGSimplificationPass());
    300     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    301         if (LLVM_LIKELY(IROutputStream == nullptr)) {
    302             if (codegen::IROutputFilename.empty()) {
    303                 IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    304             } else {
    305                 std::error_code error;
    306                 IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    307             }
    308         }
    309         PM.add(createPrintModulePass(*IROutputStream));
    310     }
    311 
    312     #ifndef USE_LLVM_3_6
    313     std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    314     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    315         if (codegen::ASMOutputFilename.empty()) {
    316             ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    317         } else {
    318             std::error_code error;
    319             ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
    320         }
    321         if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    322             report_fatal_error("LLVM error: could not add emit assembly pass");
    323         }
    324     }
    325     #endif
    326 
    327     Module * module = nullptr;
    328 
    329     try {
    330 
    331         for (Kernel * const kernel : mPipeline) {
    332             iBuilder->setKernel(kernel);
    333             module = kernel->getModule();
    334             bool uncachedObject = true;
    335             if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    336                 uncachedObject = false;
    337             }
    338             if (uncachedObject) {
    339                 module->setTargetTriple(mMainModule->getTargetTriple());
    340                 kernel->generateKernel(iBuilder);
    341                 PM.run(*module);
    342             }
    343             mEngine->addModule(std::unique_ptr<Module>(module));
    344             mEngine->generateCodeForModule(module);
    345         }
    346 
    347         iBuilder->setKernel(nullptr);
    348         module = mMainModule;
    349         PM.run(*mMainModule);
    350 
    351         mEngine->finalizeObject();
    352 
    353     } catch (const std::exception & e) {
    354         report_fatal_error(e.what());
    355     }
    356 
    357 }
    358 
    359 
    360 //void ParabixDriver::linkAndFinalize() {
    361 
    362 //    legacy::PassManager PM;
    363 //    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    364 //        PM.add(createVerifierPass());
    365 //    }
    366 //    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
    367 //    PM.add(createReassociatePass());             //Reassociate expressions.
    368 //    PM.add(createGVNPass());                     //Eliminate common subexpressions.
    369 //    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    370 //    PM.add(createCFGSimplificationPass());
    371 
    372 //    unsigned threadCount = 2; //std::thread::hardware_concurrency();
    373 
    374 //    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    375 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    376 //        threadCount = 1; // If we're dumping IR, disable seperate compilation
    377 //        if (codegen::IROutputFilename.empty()) {
    378 //            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    379 //        } else {
    380 //            std::error_code error;
    381 //            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
    382 //        }
    383 //        PM.add(createPrintModulePass(*IROutputStream));
    384 //    }
    385 
    386 //    #ifndef USE_LLVM_3_6
    387 //    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    388 //    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    389 //        threadCount = 1; // If we're dumping ASM, disable seperate compilation
    390 //        if (codegen::ASMOutputFilename.empty()) {
    391 //            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    392 //        } else {
    393 //            std::error_code error;
    394 //            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
    395 //        }
    396 //        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    397 //            report_fatal_error("LLVM error: could not add emit assembly pass");
    398 //        }
    399 //    }
    400 //    #endif
    401 
    402 //    Module * module = mMainModule;
    403 //    WorkQueue<Module *> Q(mPipeline.size());
    404 //    std::thread compilation_thread[threadCount - 1];
    405 
    406 //    try {
    407 
    408 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    409 //            compilation_thread[i] = std::thread([this, &Q]{
    410 
    411 //                InitializeNativeTarget();
    412 
    413 //                Module * module = nullptr;
    414 //                while (Q.pop(module)) {
    415 //                    mEngine->addModule(std::unique_ptr<Module>(module));
    416 //                    mEngine->generateCodeForModule(module);
    417 //                }
    418 //            });
    419 //        }
    420 
    421 //        module = mMainModule;
    422 //        iBuilder->setKernel(nullptr);
    423 //        PM.run(*mMainModule);
    424 //        Q.push(mMainModule);
    425 
    426 //        for (Kernel * const kernel : mPipeline) {
    427 //            iBuilder->setKernel(kernel);
    428 //            module = kernel->getModule();
    429 //            bool uncachedObject = true;
    430 //            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    431 //                uncachedObject = false;
    432 //            }
    433 //            if (uncachedObject) {
    434 //                module->setTargetTriple(mMainModule->getTargetTriple());
    435 //                kernel->generateKernel(iBuilder);
    436 //                PM.run(*module);
    437 //            }
    438 //            Q.push(module);
    439 //        }
    440 
    441 //        for (;;) {
    442 //            if (Q.empty()) {
    443 //                break;
    444 //            } else if (Q.try_pop(module)) {
    445 //                mEngine->addModule(std::unique_ptr<Module>(module));
    446 //                mEngine->generateCodeForModule(module);
    447 //            }
    448 //        }
    449 
    450 //        Q.notify_all();
    451 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    452 //            compilation_thread[i].join();
    453 //        }
    454 
    455 //        assert (Q.empty());
    456 
    457 //        mEngine->finalizeObject();
    458 
    459 //    } catch (const std::exception & e) {
    460 //        module->dump();
    461 //        report_fatal_error(e.what());
    462 //    }
    463 
    464 //}
    465 
    466 const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
    467     return iBuilder;
    468 }
    469 
    470 void * ParabixDriver::getPointerToMain() {
    471     return mEngine->getPointerToNamedFunction("Main");
    472 }
    473 
    474 ParabixDriver::~ParabixDriver() {
    475     delete mCache;
    476 }
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5458 r5464  
    77#ifndef TOOLCHAIN_H
    88#define TOOLCHAIN_H
    9 #include <string>
    10 #include <IR_Gen/FunctionTypeBuilder.h>
    11 #include <kernels/kernel.h>
    12 #include <kernels/streamset.h>
    139
    14 #include <toolchain/NVPTXDriver.h>
    15 namespace llvm { class ExecutionEngine; }
    16 namespace llvm { class Function; }
    17 namespace llvm { class Module; }
    18 namespace llvm { class TargetMachine; }
    19 namespace llvm { class formatted_raw_ostream; }
     10#include <llvm/ADT/StringRef.h>
     11#include <llvm/Support/CodeGen.h>
     12#include <llvm/Target/TargetOptions.h>
     13#include <llvm/Target/TargetMachine.h>
     14
     15// FIXME: llvm/CodeGen/CommandFlags.h can only be included once or the various cl::opt causes multiple definition
     16// errors. To bypass for now, the relevant options and functions are accessible from here. Re-evaluate with later
     17// versions of LLVM.
     18
    2019namespace llvm { namespace cl { class OptionCategory; } }
    21 namespace kernel { class Kernel; }
    22 namespace kernel { class KernelBuilder; }
    23 namespace IDISA { class IDISA_Builder; }
    24 
    25 class ParabixObjectCache;
    2620
    2721namespace codegen {
     22
    2823const llvm::cl::OptionCategory * codegen_flags();
    2924
     
    3934};
    4035
    41 bool DebugOptionIsSet(DebugFlags flag);
     36bool DebugOptionIsSet(const DebugFlags flag);
    4237
    43 
    44 extern char OptLevel;  // set from command line
     38extern bool pipelineParallel;
     39extern bool segmentPipelineParallel;
     40#ifndef USE_LLVM_3_6
     41extern const std::string ASMOutputFilename;
     42#endif
     43extern const std::string IROutputFilename;
     44extern const std::string ObjectCacheDir;
     45extern const llvm::CodeGenOpt::Level OptLevel;  // set from command line
    4546extern int BlockSize;  // set from command line
    4647extern int SegmentSize;  // set from command line
    4748extern int BufferSegments;
    4849extern int ThreadNum;
     50extern const bool EnableObjectCache;
    4951extern bool EnableAsserts;
    5052extern bool EnableCycleCounter;
    5153extern bool NVPTX;
    5254extern int GroupNum;
     55extern const llvm::TargetOptions Options;
     56extern const llvm::Reloc::Model RelocModel;
     57extern const llvm::CodeModel::Model CMModel;
     58extern const std::string MArch;
     59extern const std::string RunPass;
     60extern const llvm::TargetMachine::CodeGenFileType FileType;
     61extern const std::string StopAfter;
     62extern const std::string StartAfter;
     63
     64std::string getCPUStr();
     65std::string getFeaturesStr();
     66void setFunctionAttributes(llvm::StringRef CPU, llvm::StringRef Features, llvm::Module &M);
     67
    5368}
    5469
     
    6075bool AVX2_available();
    6176
    62 class ParabixDriver {
    63     friend class CBuilder;
    64 public:
    65     ParabixDriver(std::string && moduleName);
    66 
    67     ~ParabixDriver();
    68    
    69     const std::unique_ptr<kernel::KernelBuilder> & getBuilder();
    70    
    71     parabix::ExternalBuffer * addExternalBuffer(std::unique_ptr<parabix::ExternalBuffer> b);
    72    
    73     parabix::StreamSetBuffer * addBuffer(std::unique_ptr<parabix::StreamSetBuffer> b);
    74    
    75     kernel::Kernel * addKernelInstance(std::unique_ptr<kernel::Kernel> kb);
    76    
    77     void addKernelCall(kernel::Kernel & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    78 
    79     void makeKernelCall(kernel::Kernel * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    80    
    81     void generatePipelineIR();
    82    
    83     template <typename ExternalFunctionType>
    84     llvm::Function * LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
    85 
    86     void linkAndFinalize();
    87    
    88     void * getPointerToMain();
    89 
    90 protected:
    91 
    92     llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    93 
    94 private:
    95     std::unique_ptr<llvm::LLVMContext>                      mContext;
    96     llvm::Module * const                                    mMainModule;
    97     std::unique_ptr<kernel::KernelBuilder>                  iBuilder;
    98     llvm::TargetMachine *                                   mTarget;
    99     llvm::ExecutionEngine *                                 mEngine;
    100     ParabixObjectCache *                                    mCache;
    101 
    102     std::vector<kernel::Kernel *>                           mPipeline;
    103     // Owned kernels and buffers that will persist with this ParabixDriver instance.
    104     std::vector<std::unique_ptr<kernel::Kernel>>            mOwnedKernels;
    105     std::vector<std::unique_ptr<parabix::StreamSetBuffer>>  mOwnedBuffers;
    106 };
    107 
    108 template <typename ExternalFunctionType>
    109 llvm::Function * ParabixDriver::LinkFunction(kernel::Kernel & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const {
    110     llvm::FunctionType * const type = FunctionTypeBuilder<ExternalFunctionType>::get(*mContext.get());
    111     assert ("FunctionTypeBuilder did not resolve a function type." && type);
    112     return LinkFunction(kb.getModule(), name, type, reinterpret_cast<void *>(functionPtr));
    113 }
    114 
    11577#endif
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5440 r5464  
    2424#include <pablo/pe_zeroes.h>
    2525#include <toolchain/toolchain.h>
    26 #include "kernels/streamset.h"                     // for CircularBuffer
    27 #include "llvm/ADT/StringRef.h"                    // for StringRef
    28 #include "llvm/IR/CallingConv.h"                   // for ::C
    29 #include "llvm/IR/DerivedTypes.h"                  // for ArrayType, Pointer...
    30 #include "llvm/IR/LLVMContext.h"                   // for LLVMContext
    31 #include "llvm/IR/Value.h"                         // for Value
    32 #include "llvm/Support/Compiler.h"                 // for LLVM_UNLIKELY
    33 #include <pablo/builder.hpp>                       // for PabloBuilder
     26#include <toolchain/cpudriver.h>
     27#include <kernels/streamset.h>
     28#include <llvm/ADT/StringRef.h>
     29#include <llvm/IR/CallingConv.h>
     30#include <llvm/IR/DerivedTypes.h>
     31#include <llvm/IR/LLVMContext.h>
     32#include <llvm/IR/Value.h>
     33#include <llvm/Support/Compiler.h>
     34#include <pablo/builder.hpp>
    3435#include <boost/interprocess/anonymous_shared_memory.hpp>
    3536#include <boost/interprocess/mapped_region.hpp>
     
    5253    U8U16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b);
    5354    bool isCachable() const override { return true; }
    54     bool moduleIDisSignature() const override { return true; }
     55    bool hasSignature() const override { return false; }
    5556    void generatePabloMethod() override;
    5657};
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5457 r5464  
    1111#include <llvm/IR/Function.h>
    1212#include <llvm/IR/Module.h>
    13 #include <llvm/ExecutionEngine/ExecutionEngine.h>
    14 #include "llvm/Linker/Linker.h"
     13// #include <llvm/ExecutionEngine/ExecutionEngine.h>
     14// #include <llvm/Linker/Linker.h>
    1515#include <llvm/Support/CommandLine.h>
    1616#include <llvm/Support/raw_ostream.h>
     
    2424#include <pablo/pablo_compiler.h>
    2525#include <pablo/pablo_toolchain.h>
     26#include <toolchain/cpudriver.h>
    2627#include <fcntl.h>
    2728
     
    8687    WordCountKernel(const std::unique_ptr<kernel::KernelBuilder> & b);
    8788    bool isCachable() const override { return true; }
    88     bool moduleIDisSignature() const override { return true; }
     89    bool hasSignature() const override { return false; }
    8990protected:
    9091    void generatePabloMethod() override;
Note: See TracChangeset for help on using the changeset viewer.